Harvard:Biophysics 101/2007/Notebook:Katie Fifer/2007-2-6
From OpenWetWare
Jump to navigationJump to search
#!/usr/bin/env python
# Added translate so that we can use it in finding the protein translation
from Bio import GenBank, Seq
from Bio.Seq import translate
# We can create a GenBank object that will parse a raw record
# This facilitates extracting specific information from the sequences
record_parser = GenBank.FeatureParser()
# NCBIDictionary is an interface to Genbank
ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)
# If you pass NCBIDictionary a GenBank id, it will download that record
# PART 1: A different GenBank ID
parsed_record = ncbi_dict['116496513']
print "GenBank id:", parsed_record.id
# Extract the sequence from the parsed_record
s = parsed_record.seq.tostring()
print "total sequence length:", len(s)
max_repeat = 9
print "method 1"
for i in range(max_repeat):
# PART 2: Change A to T
substr = .join(['T' for n in range(i+1)])
print substr, s.count(substr)
print "\nmethod 2"
for i in range(max_repeat):
# PART 2: Change A to T
substr = .join(['T' for n in range(i + 1)])
count = 0
pos = s.find(substr, 0)
while not pos == -1:
count = count + 1
pos = s.find(substr, pos + 1)
print substr, count
# PART 3: Print the translated protein sequence
print "protein translation is:"
my_protein = translate(s)
print my_protein
print "its length is:", len(my_protein)
# PART 4: New dictionary without parser. Just print raw record.
ncbi_dict2 = GenBank.NCBIDictionary('nucleotide', 'genbank')
new_raw_record = ncbi_dict2['116496513']
print '\n'
print new_raw_record