User:Anugraha Raman/problem123.txt


 * 1) BiPython Script By Anugraha Raman
 * 2) For BP 101 Problems 1,2,3 Due September 24, 2009
 * 1) For BP 101 Problems 1,2,3 Due September 24, 2009

from Bio import SeqIO from Bio.Seq import Seq import os
 * 1) from Bio.Alphabet import IUPAC


 * 1)   Functions defined in this script file are as follows:
 * 2)   translate_dna(seq)      : Translates DNA sequence to amino acid sequence using a standard table
 * 3)   translate_frame_way1(seq,myfile) : translates a DNA sequence seq into its protein sequence in all 6 frames (+1, +2, +3, -1, -2, -3)
 * 4)   translate_frame_way2(seq,myfile) : translates DNA seq into protein sequence in all 6 frames using default BioPython data
 * 5)   writeheader(myfile)     : Writes a specific HTML header using the myfile handle
 * 6)   writefooter(myfile)     : Writes a specific HTML footer using the myfile handle
 * 1)   translate_frame_way2(seq,myfile) : translates DNA seq into protein sequence in all 6 frames using default BioPython data
 * 2)   writeheader(myfile)     : Writes a specific HTML header using the myfile handle
 * 3)   writefooter(myfile)     : Writes a specific HTML footer using the myfile handle


 * 1) Standard translation from Biophys101_assign3b.doc

standard3b = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C', 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*', 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',

'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R', 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R', 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R', 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',

'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S', 'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S', 'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R', 'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',

'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G', 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G', 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G', 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G'		}

def translate_dna(seq): """ translates tri-nucleotide sequences (codon) to its one letter amino acid """ aa_translation = "" for codon_loc in xrange(0,len(seq),3): # if you do not find the codon translation i.e partial codon # or something else replace with ? aa_translation = aa_translation + standard3b.get(str(seq[codon_loc:codon_loc+3]), "?") return aa_translation

def translate_frame_way1(seq,myfile): #Amino Acid Translation #Method 1 ===>Using the Standard table defined # as in Biophysics101_assign3b word document # Using translate-dna(seq, code)function

# +1 Frame # using the translate method in Bio.Seq # implemented in Libs/sitepackages/Bio/Seq.py   plusone_seq =  seq amino_seq1 = translate_dna(plusone_seq) print '(+1) frame translation is: ' print amino_seq1 myfile.write(' (+1) frame translation is: ') myfile.write(amino_seq1) # +2 Frame #   # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = translate_dna(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write(' (+2) frame translation is: ') myfile.write(amino_seq2) # +3 Frame #   # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = translate_dna(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write(' (+3) frame translation is: ') myfile.write(amino_seq3)

r_seq = seq.reverse_complement # -1 Frame #   # original sequence reversed minusone_seq = r_seq amino_seq4 = translate_dna(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write(' (-1) frame translation is: ') myfile.write(amino_seq4) # -2 Frame #   # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = translate_dna(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write(' (-2) frame translation is: ') myfile.write(amino_seq5)

# -3 Frame #   # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = translate_dna(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write(' (-3) frame translation is: ') myfile.write(amino_seq6)
 * 1) end of function translate_frame_way1

def translate_frame_way2(seq, myfile): #Amino Acid Translation #Method 2 ===>Using the Standard table defined #in Bio.Data CodonTable.py   # by using the default Seq.translate
 * 1) Another way of getting the translated result

# +1 Frame # using the translate method in Bio.Seq # implemented in Libs/sitepackages/Bio/Seq.py   plusone_seq =  seq amino_seq1 = Seq.translate(plusone_seq) print '(+1) frame translation is: ' print amino_seq1 myfile.write(' (+1) frame translation is: ') myfile.write(amino_seq1)

# +2 Frame #   # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = Seq.translate(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write(' (+2) frame translation is: ') myfile.write(amino_seq2)

# +3 Frame #   # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = Seq.translate(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write(' (+3) frame translation is: ') myfile.write(amino_seq3) r_seq = seq.reverse_complement # -1 Frame #   # original sequence reversed minusone_seq = r_seq amino_seq4 = Seq.translate(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write(' (-1) frame translation is: ') myfile.write(amino_seq4)

# +2 Frame #   # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = Seq.translate(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write(' (-2) frame translation is: ') myfile.write(amino_seq5) # -3 Frame #   # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = Seq.translate(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write(' (-3) frame translation is: ') myfile.write(amino_seq6)


 * 1) end of function translate_frame_way2

def writeheader(my_file): #Write an extra cool header file header = str ('  Biophysisc 101: Genomics, Computing and Economics >> ') header = header + str(' Problems 1,2,3: ') my_file.write(header) my_file.write(' ') my_file.write('') my_file.write(' ') my_file.write(' ')


 * 1) end of function writeheader

def writefooter(my_file): # write the footer on the html file footer = str ('  BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source:  Problems123.py ') my_file.write(footer) my_file.write(' ')


 * 1) end of function writefooter

input_file = open('p53seg.txt', 'r') for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq
 * 1) Begin my script

output_file_name = os.path.join(os.getcwd, 'anugraha-092409-prob123.html') output_file = open(output_file_name, 'w')

writeheader(output_file)


 * 1) print 'DNA sequence is: '
 * 2) print my_seq

g_count = cur_record.seq.count('g')
 * 1) GC count done explicitly, i.e. problem #1 in this assignment set
 * 2) Get the number of Guanines in the sequence
 * 1) Get the number of Guanines in the sequence

c_count = cur_record.seq.count('c')
 * 1) Get the number of Cytosines in the sequence

seq_count = len(cur_record)
 * 1) Get the length of the sequence

gc_percent = ((g_count + c_count) / float(seq_count)) * 100 print 'GC % is: ' + str(gc_percent) output_file.write('  GC % is: ' + str(gc_percent) )
 * 1) use float in denominator to get the decimal answer for GC%

rev_seq = my_seq.reverse_complement print 'DNA reverse complement of p53seg is: ' output_file.write('  DNA reverse complement of p53seg is: ') print rev_seq output_file.write(str(rev_seq))
 * 1) get the reversed complement of the sequence, i.e. problem #2 in this assignment set

translate_frame_way1(my_seq, output_file)
 * 1) using table in 3b assignment,, i.e. problem #3 in this assignment set


 * 1) using Standard table in Bio.Data CodonTable.py
 * 2) translate_frame_way2(my_seq)

writefooter(output_file)

input_file.close

output_file.close
 * 1) close the file handle so the file is actually written to disk

print 'Completed Problems 1 2 3 run .... finished writing > ' + str(output_file_name) os.system('explorer ' + output_file_name)
 * 1) Open internet explorer and display the file