Harvard:Biophysics 101/2007/Notebook:Kaull/2007-5-2

OMIM workaround in work-able form. Enjoy.

Specs: Takes any rs # Returns AllelicVariant struct - which is how Xiaodi's code worked before, so life should not be complicated for those downstream (tell me if this is untrue!)

from Bio import SeqIO from Bio.Blast import NCBIWWW from Bio.EUtils import DBIdsClient

import xml.dom.minidom from xml.dom.minidom import parse, parseString from threading import Thread

import pickle, sys, time, urllib


 * 1) Code taken from elsewhere, unedited
 * 1) Code taken from elsewhere, unedited

outputlist = []

class AllelicVariant: pass
 * 1) C-style struct to pass parameters

def get_text(node_list): rc = "" for node in node_list: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc
 * 1) basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html

def extract_allelic_variant_data(str): dom = parseString(str) variants = dom.getElementsByTagName("Mim-allelic-variant") if len(variants) == 0: return parsed = [] for v in variants: a = AllelicVariant # create empty instance of struct # now populate the struct a.name = get_text(v.getElementsByTagName("Mim-allelic-variant_name")[0].childNodes) a.mutation = get_text(v.getElementsByTagName("Mim-allelic-variant_mutation")[0].getElementsByTagName("Mim-text_text")[0].childNodes) a.description = get_text(v.getElementsByTagName("Mim-allelic-variant_description")[0].getElementsByTagName("Mim-text_text")[0].childNodes) parsed.append(a) return parsed
 * 1) extracts allelic variant data, as the name implies, using the struct above

def omim_snp_search(dnsnp_id): client = DBIdsClient.DBIdsClient query = client.search(dnsnp_id, "omim") records = [i.efetch(rettype="xml") for i in query] return records
 * 1) queries the database and returns all info in an XML format


 * 1) Code taken from elsewhere, edited
 * 1) Code taken from elsewhere, edited

def omim_tag_search(tag_id): client = DBIdsClient.DBIdsClient query = client.search(tag_id, "omim") records = [i.efetch(rettype="xml") for i in query] return records
 * 1) queries the database and returns all info in an XML format


 * 1) New code - from Kay
 * 1) New code - from Kay

snp_id = 'rs11200638'
 * 1) **** TEST DATA **** (delete me!)

def parse_geneID_tag(snp_id): SNP_URL = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=snp' url = SNP_URL + '&id=' + snp_id + '&mode=xml' dom = minidom.parse(urllib.urlopen(url)) symbol = dom.getElementsByTagName("FxnSet_symbol") return symbol[0].toxml.split('>')[1].split('<')[0]
 * 1) queries the SNP database and returns geneID tag as a string
 * 2) Currently, DBIdsClient does not support snp parsing - so it's not used.
 * 3) A future update should correct this when possible, for ease of reading.


 * 1) Note: This code is a temporary solution to a dbSNP formatting issue.
 * 2) Older entries are best searched directly by ID #.
 * 3)  - > this case is the first covered
 * 4) Newer entries are not indexed in this fashion, although SNP ID data is
 * 5)  available on the individual entry.  These contain Allelic Variant data
 * 6)  which is located and extracted by the script
 * 7)  - > this case is the second covered

def snp_to_omim(snp_id): records = omim_snp_search(snp_id)
 * 1) takes SNP ID and gets search results from OMIM in XML format

if records == list: tag_id = parse_geneID_tag(snp_id) records = omim_tag_search(tag_id)

return records


 * 1) I'm not yet happy with this bottom bit.  I'll keep the output steady
 * 2) for downstream, though.

o = snp_to_omim(snp_id)

if len(o) == 0: outputlist.append("No information found for " + snp_id + "\n") else: outputlist.append(snp_id + " details:" + "\n") for i in o:               v = extract_allelic_variant_data(i.read) if v != None: for a in v:                               outputlist.append(a.name + "\n") outputlist.append(a.mutation + "\n") outputlist.append(a.description + "\n") for item in outputlist: print item
 * 1) nothing more to be done if no records can be found
 * 2) otherwise, find the allelic variant data
 * 1) print '-' * 40
 * 2) print "\n"
 * 3) print "yay! we're done!"

''' o = omim_snp_search(snp_id) for i in o:   dom = parseString(i.read) print dom.toxml '''