User:Anugraha Raman/problem123.txt

BiPython Script By Anugraha Raman
For BP 101 Problems 1,2,3 Due September 24, 2009

from Bio import SeqIO from Bio.Seq import Seq

from Bio.Alphabet import IUPAC

import os

Functions defined in this script file are as follows:
translate_dna(seq) : Translates DNA sequence to amino acid sequence using a standard table
translate_frame_way1(seq,myfile) : translates a DNA sequence seq into its protein sequence in all 6 frames (+1, +2, +3, -1, -2, -3)
translate_frame_way2(seq,myfile) : translates DNA seq into protein sequence in all 6 frames using default BioPython data
writeheader(myfile) : Writes a specific HTML header using the myfile handle
writefooter(myfile) : Writes a specific HTML footer using the myfile handle

Standard translation from Biophys101_assign3b.doc

standard3b = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C', 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*', 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',

'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R', 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R', 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R', 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',

		'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S',
		'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S',

'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R',

 		'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',

'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G', 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G', 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G', 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G' }

def translate_dna(seq):

   """ translates tri-nucleotide sequences (codon) to its one letter amino acid  """
   aa_translation = ""
   for codon_loc in xrange(0,len(seq),3):
       # if you do not find the codon translation i.e partial codon
       # or something else replace with ?
       aa_translation = aa_translation + standard3b.get(str(seq[codon_loc:codon_loc+3]), "?")
   return aa_translation

def translate_frame_way1(seq,myfile):

   #Amino Acid Translation
   #Method 1 ===>Using the Standard table defined
   # as in Biophysics101_assign3b word document
   # Using translate-dna(seq, code)function

   # +1 Frame
   # using the translate method in Bio.Seq
   # implemented in Libs/sitepackages/Bio/Seq.py
   plusone_seq =  seq
   
   amino_seq1 = translate_dna(plusone_seq)
   print '(+1) frame translation is: '
   print amino_seq1

myfile.write('

(+1) frame translation is: ') myfile.write(amino_seq1) # +2 Frame # # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = translate_dna(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write('

(+2) frame translation is: ') myfile.write(amino_seq2) # +3 Frame # # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = translate_dna(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write('

(+3) frame translation is: ') myfile.write(amino_seq3) r_seq = seq.reverse_complement() # -1 Frame # # original sequence reversed minusone_seq = r_seq amino_seq4 = translate_dna(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write('

(-1) frame translation is: ') myfile.write(amino_seq4) # -2 Frame # # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = translate_dna(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write('

(-2) frame translation is: ') myfile.write(amino_seq5) # -3 Frame # # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = translate_dna(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write('

(-3) frame translation is: ') myfile.write(amino_seq6)

end of function translate_frame_way1

Another way of getting the translated result

def translate_frame_way2(seq, myfile): #Amino Acid Translation #Method 2 ===>Using the Standard table defined #in Bio.Data CodonTable.py # by using the default Seq.translate # +1 Frame # using the translate method in Bio.Seq # implemented in Libs/sitepackages/Bio/Seq.py plusone_seq = seq amino_seq1 = Seq.translate(plusone_seq) print '(+1) frame translation is: ' print amino_seq1 myfile.write('

(+1) frame translation is: ') myfile.write(amino_seq1) # +2 Frame # # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = Seq.translate(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write('

(+2) frame translation is: ') myfile.write(amino_seq2) # +3 Frame # # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = Seq.translate(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write('

(+3) frame translation is: ') myfile.write(amino_seq3) r_seq = seq.reverse_complement() # -1 Frame # # original sequence reversed minusone_seq = r_seq amino_seq4 = Seq.translate(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write('

(-1) frame translation is: ') myfile.write(amino_seq4) # +2 Frame # # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = Seq.translate(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write('

(-2) frame translation is: ') myfile.write(amino_seq5) # -3 Frame # # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = Seq.translate(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write('

(-3) frame translation is: ') myfile.write(amino_seq6)

end of function translate_frame_way2

def writeheader(my_file): #Write an extra cool header file header = str ('<html> Biophysisc 101: Genomics, Computing and Economics >> ') header = header + str(' Problems 1,2,3: ') my_file.write(header) my_file.write('') my_file.write('<img border="2" src="2009BP101-logo1.png">') my_file.write('</img>') my_file.write('')

end of function writeheader

def writefooter(my_file): # write the footer on the html file footer = str (' BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source: <a href=Problems123.py> Problems123.py </a>') my_file.write(footer) my_file.write(' </html>')

end of function writefooter

Begin my script

input_file = open('p53seg.txt', 'r') for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq output_file_name = os.path.join(os.getcwd(), 'anugraha-092409-prob123.html') output_file = open(output_file_name, 'w') writeheader(output_file)

print 'DNA sequence is: '
print my_seq

GC count done explicitly, i.e. problem #1 in this assignment set
Get the number of Guanines in the sequence

g_count = cur_record.seq.count('g')

Get the number of Cytosines in the sequence

c_count = cur_record.seq.count('c')

Get the length of the sequence

seq_count = len(cur_record)

use float in denominator to get the decimal answer for GC%

gc_percent = ((g_count + c_count) / float(seq_count)) * 100 print 'GC % is: ' + str(gc_percent) output_file.write('

GC % is: ' + str(gc_percent) )

get the reversed complement of the sequence, i.e. problem #2 in this assignment set

rev_seq = my_seq.reverse_complement() print 'DNA reverse complement of p53seg is: ' output_file.write('

DNA reverse complement of p53seg is: ') print rev_seq output_file.write(str(rev_seq))

using table in 3b assignment,, i.e. problem #3 in this assignment set

translate_frame_way1(my_seq, output_file)

using Standard table in Bio.Data CodonTable.py
translate_frame_way2(my_seq)

writefooter(output_file) input_file.close()

close the file handle so the file is actually written to disk

output_file.close() print 'Completed Problems 1 2 3 run .... finished writing > ' + str(output_file_name)

Open internet explorer and display the file

os.system('explorer ' + output_file_name)

User:Anugraha Raman/problem123.txt

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

research

Tools