User talk:Anugraha Raman/problem123.txt

From OpenWetWare

Jump to: navigation, search

<syntax type="python>

  1. BiPython Script By Anugraha Raman
  2. For BP 101 Problems 1,2,3 Due September 24, 2009

from Bio import SeqIO from Bio.Seq import Seq

  1. from Bio.Alphabet import IUPAC

import os

  1. Functions defined in this script file are as follows:
  2. translate_dna(seq)  : Translates DNA sequence to amino acid sequence using a standard table
  3. translate_frame_way1(seq,myfile) : translates a DNA sequence seq into its protein sequence in all 6 frames (+1, +2, +3, -1, -2, -3)
  4. translate_frame_way2(seq,myfile) : translates DNA seq into protein sequence in all 6 frames using default BioPython data
  5. writeheader(myfile)  : Writes a specific HTML header using the myfile handle
  6. writefooter(myfile)  : Writes a specific HTML footer using the myfile handle


  1. Standard translation from Biophys101_assign3b.doc

standard3b = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C', 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*', 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',

'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R', 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R', 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R', 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',

		'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S',
		'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S',

'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R',

 		'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',

'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G', 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G', 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G', 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G' }


def translate_dna(seq):

   """ translates tri-nucleotide sequences (codon) to its one letter amino acid  """
   aa_translation = ""
   for codon_loc in xrange(0,len(seq),3):
       # if you do not find the codon translation i.e partial codon
       # or something else replace with ?
       aa_translation = aa_translation + standard3b.get(str(seq[codon_loc:codon_loc+3]), "?")
   return aa_translation

def translate_frame_way1(seq,myfile):

   #Amino Acid Translation
   #Method 1 ===>Using the Standard table defined
   # as in Biophysics101_assign3b word document
   # Using translate-dna(seq, code)function
   # +1 Frame
   # using the translate method in Bio.Seq
   # implemented in Libs/sitepackages/Bio/Seq.py
   plusone_seq =  seq
   
   amino_seq1 = translate_dna(plusone_seq)
   print '(+1) frame translation is: '
   print amino_seq1
myfile.write('

(+1) frame translation is: ') myfile.write(amino_seq1) # +2 Frame # # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = translate_dna(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write('<p> (+2) frame translation is: ') myfile.write(amino_seq2) # +3 Frame # # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = translate_dna(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write('<p> (+3) frame translation is: ') myfile.write(amino_seq3) r_seq = seq.reverse_complement() # -1 Frame # # original sequence reversed minusone_seq = r_seq amino_seq4 = translate_dna(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write('<p> (-1) frame translation is: ') myfile.write(amino_seq4) # -2 Frame # # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = translate_dna(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write('<p> (-2) frame translation is: ') myfile.write(amino_seq5) # -3 Frame # # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = translate_dna(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write('<p> (-3) frame translation is: ') myfile.write(amino_seq6)

  1. end of function translate_frame_way1
  1. Another way of getting the translated result
def translate_frame_way2(seq, myfile): #Amino Acid Translation #Method 2 ===>Using the Standard table defined #in Bio.Data CodonTable.py # by using the default Seq.translate # +1 Frame # using the translate method in Bio.Seq # implemented in Libs/sitepackages/Bio/Seq.py plusone_seq = seq amino_seq1 = Seq.translate(plusone_seq) print '(+1) frame translation is: ' print amino_seq1 myfile.write('<p> (+1) frame translation is: ') myfile.write(amino_seq1) # +2 Frame # # original sequence minus the first nucleic acid in the sequence plustwo_seq = seq[1:] amino_seq2 = Seq.translate(plustwo_seq) print '(+2) frame translation is: ' print amino_seq2 myfile.write('<p> (+2) frame translation is: ') myfile.write(amino_seq2) # +3 Frame # # original sequence minus the first two nucleic acids in the sequence plusthree_seq = seq[2:] amino_seq3 = Seq.translate(plusthree_seq) print '(+3) frame translation is: ' print amino_seq3 myfile.write('<p> (+3) frame translation is: ') myfile.write(amino_seq3) r_seq = seq.reverse_complement() # -1 Frame # # original sequence reversed minusone_seq = r_seq amino_seq4 = Seq.translate(r_seq) print '(-1) frame translation is: ' print amino_seq4 myfile.write('<p> (-1) frame translation is: ') myfile.write(amino_seq4) # +2 Frame # # reversed sequence minus the first nucleic acid minustwo_seq = r_seq[1:] amino_seq5 = Seq.translate(minustwo_seq) print '(-2) frame translation is: ' print amino_seq5 myfile.write('<p> (-2) frame translation is: ') myfile.write(amino_seq5) # -3 Frame # # reversed sequence minus the first two nucleic acids minusthree_seq = r_seq[2:] amino_seq6 = Seq.translate(minusthree_seq) print '(-3) frame translation is: ' print amino_seq6 myfile.write('<p> (-3) frame translation is: ') myfile.write(amino_seq6)
  1. end of function translate_frame_way2
def writeheader(my_file): #Write an extra cool header file header = str (' Biophysisc 101: Genomics, Computing and Economics >> ') header = header + str(' Problems 1,2,3: ') my_file.write(header) my_file.write('') my_file.write('') my_file.write('') my_file.write('

') # end of function writeheader def writefooter(my_file): # write the footer on the html file footer = str ('

BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source: Problems123.py ') my_file.write(footer) my_file.write(' ')

  1. end of function writefooter
  1. Begin my script
input_file = open('p53seg.txt', 'r') for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq output_file_name = os.path.join(os.getcwd(), 'anugraha-092409-prob123.html') output_file = open(output_file_name, 'w') writeheader(output_file)
  1. print 'DNA sequence is: '
  2. print my_seq
  1. GC count done explicitly, i.e. problem #1 in this assignment set
  2. Get the number of Guanines in the sequence
g_count = cur_record.seq.count('g')
  1. Get the number of Cytosines in the sequence
c_count = cur_record.seq.count('c')
  1. Get the length of the sequence
seq_count = len(cur_record)
  1. use float in denominator to get the decimal answer for GC%
gc_percent = ((g_count + c_count) / float(seq_count)) * 100 print 'GC % is: ' + str(gc_percent) output_file.write('<p> GC % is: ' + str(gc_percent) )
  1. get the reversed complement of the sequence, i.e. problem #2 in this assignment set
rev_seq = my_seq.reverse_complement() print 'DNA reverse complement of p53seg is: ' output_file.write('<p> DNA reverse complement of p53seg is: ') print rev_seq output_file.write(str(rev_seq))
  1. using table in 3b assignment,, i.e. problem #3 in this assignment set
translate_frame_way1(my_seq, output_file)
  1. using Standard table in Bio.Data CodonTable.py
  2. translate_frame_way2(my_seq)
writefooter(output_file) input_file.close()
  1. close the file handle so the file is actually written to disk
output_file.close() print 'Completed Problems 1 2 3 run .... finished writing > ' + str(output_file_name)
  1. Open internet explorer and display the file
os.system('explorer ' + output_file_name) </syntax>

Personal tools