Harvard:Biophysics 101/2007/Notebook:Resmi Charalel/2007-4-26

Comments

 * I think I now have all of the necessary pieces of code to create a program, which will parse the XML of any returned OMIM entry to retrieve the PMIDs, then search these through PubMed and then parse through the PubMed entries for MeshTerms whose DescriptorNames are labelled 'Y' to MajorTopicYN. However, there are still a few errors in the program, which I am trying to understand and fix.

Code Thus Far
from Bio.EUtils import DBIdsClient import xml.dom.minidom from xml.dom.minidom import parse, parseString

class AllelicVariant: pass
 * 1) C-style struct to pass parameters

class PubmedID: pass

class MeshTerms: pass

def omim_snp_search(dnsnp_id): client = DBIdsClient.DBIdsClient query = client.search(dnsnp_id, "omim") records = [i.efetch(rettype="xml") for i in query] return records
 * 1) queries the database and returns all info in an XML format

def get_text(node_list): rc = "" for node in node_list: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc
 * 1) basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html

def extract_allelic_variant_data(str): dom = parseString(str) variants = dom.getElementsByTagName("Mim-allelic-variant") if len(variants) == 0: return parsed = [] for v in variants: a = AllelicVariant # create empty instance of struct # now populate the struct a.name = get_text(v.getElementsByTagName("Mim-allelic-variant_name")[0].childNodes) a.mutation = get_text(v.getElementsByTagName("Mim-allelic-variant_mutation")[0].getElementsByTagName("Mim-text_text")[0].childNodes) a.description = get_text(v.getElementsByTagName("Mim-allelic-variant_description")[0].getElementsByTagName("Mim-text_text")[0].childNodes) parsed.append(a) return parsed
 * 1) extracts allelic variant data, as the name implies, using the struct above
 * 1)    print "variant:", variants

def extract_allelic_variant_pmid(str): dom = parseString(str) pmids = dom.getElementsByTagName("Mim-reference") if len(pmids) == 0: return ids = [] for p in pmids: i = PubmedID i.pmid = get_text(p.getElementsByTagName("Mim-reference_pubmedUID")[0].childNodes) ids.append(i) return ids def extract_meshterms(str): dom = parseString(str) meshheadings = dom.getElementsByTagName("MeshHeading") if len(meshterms) == 0: return meshterms = [] for h in meshheadings: m = MeshTerms m = get_text(h.getElementsByTagName("DescriptorName MajorTopicYN")[0].childNodes) if m == "Y": mesh = get_text(h.getElementsByTagName("DescriptorName")[0].childNodes) meshterms.append(mesh) return meshterms

from Bio import PubMed from Bio import Medline import string

rec_parser = Medline.RecordParser medline_dict = PubMed.Dictionary(parser = rec_parser)

for i in omim_snp_search("rs11200638"): result = i.read if result: v = extract_allelic_variant_data(result) p = extract_allelic_variant_pmid(result) if v != None: for a in v:           print a.name print a.mutation print a.description if p != None: for s in p:           cur_record = medline_dict[s] m = extract_meshterms(cur_record) if m != None: for mh in m:                   print mh    #if p != None: # for i in p:           #print i.pmid


 * 1) disease = a.name
 * 2) search_term = "Review[ptyp] "+disease
 * 3) print search_term
 * 4) review_ids = PubMed.search_for(search_term)
 * 5) rec_parser = Medline.RecordParser
 * 6) medline_dict = PubMed.Dictionary(parser = rec_parser)
 * 7) count = 1
 * 8) for did in review_ids[0:5]:
 * 9)    cur_record = medline_dict[did]
 * 10)    print '\n', count, ')  ', string.rstrip(cur_record.title), cur_record.authors, string.strip(cur_record.source)
 * 11)    count=count+1
 * 12) for i in omim_snp_search("rs11200638"):
 * 13)    result = i.read
 * 14)    if result:
 * 15)        p = extract_allelic_variant_pmid(result)
 * 16)    if p != None:
 * 17)        key_source = PubMed.search_for(p[0])
 * 18)        key_rec = medline_dict[0]
 * 19)        print key_rec
 * 20)        keywords = get_text(key_rec.getElementsByTagName("MeshHeading")[0].childNodes)
 * 21)        print keywords
 * 1)    if p != None:
 * 2)        key_source = PubMed.search_for(p[0])
 * 3)        key_rec = medline_dict[0]
 * 4)        print key_rec
 * 5)        keywords = get_text(key_rec.getElementsByTagName("MeshHeading")[0].childNodes)
 * 6)        print keywords
 * 1)        print keywords