Harvard:Biophysics 101/2007/Notebook:Christopher Nabel/2007-2-6
From OpenWetWare
Jump to navigationJump to search
Homework Due February 6
Here is my revision of the code assigned on February 1:
#!/usr/bin/env python
from Bio import GenBank, Seq
# We need to import the Translation module from Bio.seq
from Bio.Seq import Seq,translate
# We can create a GenBank object that will parse a raw record
# This facilitates extracting specific information from the sequences
record_parser = GenBank.FeatureParser()
# NCBIDictionary is an interface to Genbank
ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)
# If you pass NCBIDictionary a GenBank id, it will download that record
parsed_record = ncbi_dict['42740729']
print "GenBank id:", parsed_record.id
# Extract the sequence from the parsed_record
s = parsed_record.seq.tostring()
print "total sequence length:", len(s)
max_repeat = 9
print "method 1"
for i in range(max_repeat):
substr = ''.join(['T' for n in range(i+1)]) #note A changed to T
print substr, s.count(substr)
print "\nmethod 2"
for i in range(max_repeat):
substr = ''.join(['T' for n in range(i+1)]) # again, A changed to T
count = 0
pos = s.find(substr,0)
while not pos == -1:
count = count + 1
pos = s.find(substr,pos+1)
print substr, count
# Translate the DNA to a protein sequence and give the length
gp = translate(s)
print "The translated sequence for Ebola Zaire is %s" % gp
print "disclaimer: this translation starts before the open reading frame"
print "The length for this sequence is", len (gp)
# Print the raw record without parsing the subsets of data
ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank')
unparsed_record = ncbi_dict['42740729']
print "Raw Record:"
print unparsed_record