User:Anugraha Raman/problem123.txt
- BiPython Script By Anugraha Raman
- For BP 101 Problems 1,2,3 Due September 24, 2009
from Bio import SeqIO from Bio.Seq import Seq
- from Bio.Alphabet import IUPAC
import os
- Functions defined in this script file are as follows:
- translate_dna(seq) : Translates DNA sequence to amino acid sequence using a standard table
- translate_frame_way1(seq,myfile) : translates a DNA sequence seq into its protein sequence in all 6 frames (+1, +2, +3, -1, -2, -3)
- translate_frame_way2(seq,myfile) : translates DNA seq into protein sequence in all 6 frames using default BioPython data
- writeheader(myfile) : Writes a specific HTML header using the myfile handle
- writefooter(myfile) : Writes a specific HTML footer using the myfile handle
-
- Standard translation from Biophys101_assign3b.doc
standard3b = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C', 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*', 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',
'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R', 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R', 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R', 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',
'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S', 'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S',
'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R',
'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',
'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G', 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G', 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G', 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G' }
def translate_dna(seq):
""" translates tri-nucleotide sequences (codon) to its one letter amino acid """ aa_translation = "" for codon_loc in xrange(0,len(seq),3): # if you do not find the codon translation i.e partial codon # or something else replace with ? aa_translation = aa_translation + standard3b.get(str(seq[codon_loc:codon_loc+3]), "?") return aa_translation
def translate_frame_way1(seq,myfile):
#Amino Acid Translation #Method 1 ===>Using the Standard table defined # as in Biophysics101_assign3b word document # Using translate-dna(seq, code)function
# +1 Frame # using the translate method in Bio.Seq # implemented in Libs/sitepackages/Bio/Seq.py plusone_seq = seq amino_seq1 = translate_dna(plusone_seq) print '(+1) frame translation is: ' print amino_seq1
myfile.write('
(+1) frame translation is: ') myfile.write(amino_seq1) # +2 Frame # # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = translate_dna(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write('
(+2) frame translation is: ') myfile.write(amino_seq2) # +3 Frame # # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = translate_dna(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write('
(+3) frame translation is: ') myfile.write(amino_seq3) r_seq = seq.reverse_complement() # -1 Frame # # original sequence reversed minusone_seq = r_seq amino_seq4 = translate_dna(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write('
(-1) frame translation is: ') myfile.write(amino_seq4) # -2 Frame # # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = translate_dna(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write('
(-2) frame translation is: ') myfile.write(amino_seq5) # -3 Frame # # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = translate_dna(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write('
(-3) frame translation is: ') myfile.write(amino_seq6)
- end of function translate_frame_way1
- Another way of getting the translated result
def translate_frame_way2(seq, myfile): #Amino Acid Translation #Method 2 ===>Using the Standard table defined #in Bio.Data CodonTable.py # by using the default Seq.translate # +1 Frame # using the translate method in Bio.Seq # implemented in Libs/sitepackages/Bio/Seq.py plusone_seq = seq amino_seq1 = Seq.translate(plusone_seq) print '(+1) frame translation is: ' print amino_seq1 myfile.write('
(+1) frame translation is: ') myfile.write(amino_seq1) # +2 Frame # # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = Seq.translate(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write('
(+2) frame translation is: ') myfile.write(amino_seq2) # +3 Frame # # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = Seq.translate(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write('
(+3) frame translation is: ') myfile.write(amino_seq3) r_seq = seq.reverse_complement() # -1 Frame # # original sequence reversed minusone_seq = r_seq amino_seq4 = Seq.translate(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write('
(-1) frame translation is: ') myfile.write(amino_seq4) # +2 Frame # # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = Seq.translate(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write('
(-2) frame translation is: ') myfile.write(amino_seq5) # -3 Frame # # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = Seq.translate(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write('
(-3) frame translation is: ') myfile.write(amino_seq6)
- end of function translate_frame_way2
def writeheader(my_file): #Write an extra cool header file header = str ('<html><font face="Trebuchet"size="3" color="#2171B7" > Biophysisc 101: Genomics, Computing and Economics >> ') header = header + str(' Problems 1,2,3: ') my_file.write(header) my_file.write('</font>') my_file.write('<img border="2" src="2009BP101-logo1.png">') my_file.write('</img>') my_file.write('<p><p>')
- end of function writeheader
def writefooter(my_file): # write the footer on the html file footer = str ('<p><p><font face="Trebuchet"size="1" color="#2171B7" > BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source: <a href=Problems123.py> Problems123.py </a>') my_file.write(footer) my_file.write('</font> </html>')
- end of function writefooter
- Begin my script
input_file = open('p53seg.txt', 'r') for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq output_file_name = os.path.join(os.getcwd(), 'anugraha-092409-prob123.html') output_file = open(output_file_name, 'w') writeheader(output_file)
- print 'DNA sequence is: '
- print my_seq
- GC count done explicitly, i.e. problem #1 in this assignment set
- Get the number of Guanines in the sequence
g_count = cur_record.seq.count('g')
- Get the number of Cytosines in the sequence
c_count = cur_record.seq.count('c')
- Get the length of the sequence
seq_count = len(cur_record)
- use float in denominator to get the decimal answer for GC%
gc_percent = ((g_count + c_count) / float(seq_count)) * 100 print 'GC % is: ' + str(gc_percent) output_file.write('
GC % is: ' + str(gc_percent) )
- get the reversed complement of the sequence, i.e. problem #2 in this assignment set
rev_seq = my_seq.reverse_complement() print 'DNA reverse complement of p53seg is: ' output_file.write('
DNA reverse complement of p53seg is: ') print rev_seq output_file.write(str(rev_seq))
- using table in 3b assignment,, i.e. problem #3 in this assignment set
translate_frame_way1(my_seq, output_file)
- using Standard table in Bio.Data CodonTable.py
- translate_frame_way2(my_seq)
writefooter(output_file) input_file.close()
- close the file handle so the file is actually written to disk
output_file.close() print 'Completed Problems 1 2 3 run .... finished writing > ' + str(output_file_name)
- Open internet explorer and display the file
os.system('explorer ' + output_file_name)