Http://openwetware.org/wiki/Anugraha Raman/usefultool


 * 1) Parse GWAS Get unique traits, SNPs
 * 2) Get population diversity from NCBI's dbSNP
 * 1) Get population diversity from NCBI's dbSNP

from Bio.Blast import NCBIWWW from Bio.EUtils import DBIdsClient
 * 1) from Bio import SeqIO

from xml.dom import minidom from xml.dom.minidom import parse, parseString import csv import os, urllib import sre
 * 1) import sre, urllib2, sys, BaseHTTPServer


 * 1) from threading import Thread
 * 2) import pickle, sys, time, urllib


 * 1)   Functions defined in this script file are as follows:
 * 2)   writeheader(myfile)     : Writes a specific HTML header using the myfile handle
 * 3)   writefooter(myfile)     : Writes a specific HTML footer using the myfile handle
 * 4)   get_snp_url(rsid)       : Takes a rsid and returns url for retrieving population diversity
 * 5)   write_traithtml(mydict,myfile): writes out the tuthamin.html file output
 * 6)   get_trait(my_file)      : Does the main parsing of the tab delimited GWAS file and gets trait info
 * 1)   get_snp_url(rsid)       : Takes a rsid and returns url for retrieving population diversity
 * 2)   write_traithtml(mydict,myfile): writes out the tuthamin.html file output
 * 3)   get_trait(my_file)      : Does the main parsing of the tab delimited GWAS file and gets trait info

def writeheader(my_file): #Write an extra cool header file header = str ('  Biophysisc 101: Genomics, Computing and Economics >> ') header = header + str(' TRUTH ') my_file.write(header) my_file.write(' ') my_file.write('') my_file.write(' ') my_file.write(' ')

def writefooter(my_file): # write the footer on the html file footer = str ('  BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source:  Anugraha_gwas-dbsnp_r3.py ') my_file.write(footer) my_file.write(' ')

def get_snp_url(rsid): SNP_URL = 'http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=' snp_id_number = rsid.strip('rs') url = SNP_URL + snp_id_number + '#Diversity' # population diversity return url

def write_traithtml(mydict,myfile): for k, v in mydict.iteritems: myfile.write('') myfile.write(k)# write the trait myfile.write(' ') for l in v[0]: # for the rsids #print l #print rsid myfile.write('') myfile.write(l) myfile.write('') myfile.write(' + ') myfile.write('  ') myfile.write('') myfile.write ('More info') myfile.write(' ') for l in v[1]: # for the pubmedids myfile.write(' pubmed: ') myfile.write(l) myfile.write('') myfile.write(' +  ') myfile.write('  ') myfile.write('')

def get_trait(my_file): gwas_file = open('gwas.txt', 'r')

reader = csv.DictReader(gwas_file, dialect='excel-tab') gwas_array = [] trait_array = [] trait_dict= {} gwas_file.seek(0) for row in reader: if 'rs' in row['SNPs']: # do not include GWAS records without rsid numbers gwas_array.append([row['Disease/Trait'],row['SNPs'],row['PubMedID']]) trait_array.append(row['Disease/Trait']) #print gwas_array trait_set = set (trait_array) # get unique set of traits print trait_set for i in trait_set: #for every unique trait rs_array = [] # start a new set of rs associations with unique trait pubmed_array = [] for j in gwas_array: if j[0] in i:               #print 'got here' rs_array.append(j[1]) pubmed_array.append(j[2]) rs_set= set(rs_array) pubmed_set = set(pubmed_array) trait_dict[i] = [rs_set,pubmed_set] return trait_dict

out_file_name = os.path.join(os.getcwd, 'truthmain.html') out_file = open(out_file_name, 'w') writeheader(out_file)
 * 1) Main
 * 2) All the relevant output will be to a file.

thetrait_dict = get_trait(out_file) # parse trait info details from GWAS file

write_traithtml(thetrait_dict,out_file) # output the main html file

writefooter(out_file)

out_file.close print 'Completed run' + str(out_file_name) os.system('explorer ' + 'truth.htm')
 * 1) close the file handle so the file is actually written to disk
 * 1) os.system('explorer ' + out_file_name)