TChan/Notebook/2007-3-20

BLAST SNP XML to OMIM

 * NOTE: This probably doesn't work - will tweak when I can get a sample xml file to work with.

from Bio.EUtils import DBIdsClient import xml.dom.minidom from xml.dom.minidom import parse, parseString


 * 1) From SNP BLAST xml to OMIM (using Xiaodi's code as a template...and then just using his code :))

class rsID: pass
 * 1) C-style struct to pass parameters

def BLAST_SNP_search(sample_seq): client = DBIdsClient.DBIdsClient query = client.search(sample_seq, "BLAST SNP") records = [i.efetch(rettype="xml") for i in query] return records
 * 1) queries the database and returns all info in an XML format

def get_text(node_list): rc = "" for node in node_list: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc
 * 1) basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html

def extract_rsID: dom = parseString rsIDs = dom.getElementsByTagName("gn1|dbSNP|") if len(rsIDs) == 0: return parsed = [] for rs in rsIDs: b = rsID # create empty instance of struct # now populate the struct b.rsIDnum = get_text(rs.getElementsByTagName("gn1|dbSNP|")[0].childNodes) parsed.append(a) return parsed
 * 1) extracts allelic variant data, as the name implies, using the struct above

seq = open('sample_sequence.txt') rsID_array = []

for i in BLAST_SNP_search(seq): b = extract_rsID(i.read) if rsIDs != None: for b in rsIDs: rsID_array.append(b.rsIDnum) print b.rsIDnum

class AllelicVariant: pass
 * 1) C-style struct to pass parameters

def omim_snp_search(dnsnp_id): client = DBIdsClient.DBIdsClient query = client.search(dnsnp_id, "omim") records = [i.efetch(rettype="xml") for i in query] return records
 * 1) queries the database and returns all info in an XML format

def get_text(node_list): rc = "" for node in node_list: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc
 * 1) basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html

def extract_allelic_variant_data(str): dom = parseString(str) variants = dom.getElementsByTagName("Mim-allelic-variant") if len(variants) == 0: return parsed = [] for v in variants: a = AllelicVariant # create empty instance of struct # now populate the struct a.name = get_text(v.getElementsByTagName("Mim-allelic-variant_name")[0].childNodes) a.mutation = get_text(v.getElementsByTagName("Mim-allelic-variant_mutation")[0].getElementsByTagName("Mim-text_text")[0].childNodes) a.description = get_text(v.getElementsByTagName("Mim-allelic-variant_description")[0].getElementsByTagName("Mim-text_text")[0].childNodes) parsed.append(a) return parsed
 * 1) extracts allelic variant data, as the name implies, using the struct above

for j in rsID_array: for i in omim_snp_search(rsID_array[j]): v = extract_allelic_variant_data(i.read) if v != None: for a in v:                           print a.name print a.mutation print a.description