Harvard:Biophysics 101/2007/Notebook:Michael Wang/2007-2-1
From OpenWetWare
Jump to navigationJump to search
Feburary 1
#!/usr/bin/env python from Bio.Seq import translate from Bio import GenBank, Seq import sys #user defines which array element to pick array_position = int(sys.argv[-1]) print "Retrieving entry number ",array_position #Creates a non-parsed library of genes with search terms Xenopus and notch search_terms = "Xenopus AND notch" gi_list = GenBank.search_for(search_terms) print "Searching for", search_terms, "entries" print gi_list print "Done searching" print "Entry ",array_position ncbi_dict_alpha = GenBank.NCBIDictionary('nucleotide', 'genbank') #prints the raw entry print ncbi_dict_alpha[gi_list[0]] # We can create a GenBank object that will parse a raw record # This facilitates extracting specific information from the sequences record_parser = GenBank.FeatureParser() # NCBIDictionary is an interface to Genbank ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser) # If you pass NCBIDictionary a GenBank id, it will download that record parsed_record = ncbi_dict[gi_list[0]] print parsed_record print "GenBank id:", parsed_record.id # Extract the sequence from the parsed_record s = parsed_record.seq.tostring() print "total sequence length:", len(s) #Translating the raw sequence my_protein = translate(s) print "translated sequence:", my_protein max_repeat = 9 print "multiple T analysis" print "method 1" for i in range(max_repeat): substr = ''.join(['T' for n in range(i+1)]) print substr, s.count(substr) print "\nmethod 2" for i in range(max_repeat): substr = ''.join(['T' for n in range(i+1)]) count = 0 pos = s.find(substr,0) while not pos == -1: count = count + 1 pos = s.find(substr,pos+1) print substr, count