Harvard:Biophysics 101/2007/Notebook:Xiaodi Wu/2007-4-5

Script for SNP processing:

from Bio import SeqIO from Bio.Blast import NCBIWWW import xml.dom.minidom from xml.dom.minidom import parse, parseString

file_handle = open("example.fasta") records = SeqIO.parse(file_handle, format="fasta") record = records.next sequence = record.seq.data print sequence

result_handle = NCBIWWW.qblast("blastn", "snp/human_9606/human_9606", sequence) blast_results = result_handle.read print blast_results

def get_text(node_list): rc = "" for node in node_list: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc
 * 1) basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html

def extract_snp_data(str): dom = parseString(str) variants = dom.getElementsByTagName("Hit") if len(variants) == 0: return parsed = [] for v in variants: # now populate the struct id = get_text(v.getElementsByTagName("Hit_accession")[0].childNodes) parsed.append(id) print id	return parsed
 * 1) extracts snp data

extract_snp_data(blast_results)