User talk:Anugraha Raman/AbnormalStops.py

From OpenWetWare

Jump to: navigation, search

<syntax type="python>

  1. BiPython Script Created By Anugraha Raman
  2. For BP 101 September 24, 2009 Problem #4

from Bio import SeqIO from Bio.Seq import Seq from Bio.Alphabet import IUPAC from random import * import os

  1. Functions defined in this script file are as follows:
  2. writeheader(myfile)  : Writes a specific HTML header using the myfile

handle

  1. writefooter(myfile)  : Writes a specific HTML footer using the myfile

handle

  1. writerunsummary(myfile) : Writes a specific set of sumamry information

using the myfile handle

  1. mutatesinglebp(seq, random_Seed, forevery, prob) : Mutates a single

base pair for forevery

  1. location range using a probability of prob;

returns mutated sequence

  1. writeAA(myfile, seq,stop_locs) : Writes the amino acids using the myfile

handle

  1. findstops(seq)  : Finds the stop locations in the given DNA

sequence

def writeheader(my_file):

  #Write an extra cool header file
  header = str ('
Biophysisc 101: Genomics, Computing and Economics >>  ')
   header = header + str(' Problem 4: ')
   my_file.write(header)
   my_file.write('')
   my_file.write('')
   my_file.write('')
   my_file.write('

') # end of function writeheader def writefooter(my_file): # write the footer on the html file footer = str ('

BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source: AbnormalStops.py ') my_file.write(footer) my_file.write(' ')

  1. end of function writefooter

def writerunsummary (runs):

output_file.write('

[Total # of Runs: ' + str(runs)) output_file.write('] ') output_file.write('Normal Stops are noted in blue ') output_file.write('Abnormal Stops are noted in red or ') output_file.write(str(' green </font>'))

  1. end of function writerunheader
  1. mutate_singlebp mutates a single base pair for forevery location range
using a
  1. probability of prob
def mutate_singlebp(seq, random_seed=0, forevery=100, prob=0.01): # reset seed if random_seed == 0: seed() else: seed(random_seed) for x in range(0,forevery): r=randrange(0,1/prob) if r == 0: mutate_pos = randrange(0,len(seq)-1) old_base = seq[mutate_pos] if old_base == 'a': mutated_base = choice(['c', 't', 'g']) elif old_base == 'c': mutated_base = choice(['a', 't', 'g']) elif old_base == 't': mutated_base = choice(['a', 'c', 'g']) else: # 'g' mutated_base = choice(['a', 'c', 't']) # mutable sequence seq is updated seq[mutate_pos] = mutated_base # end if # end for
  1. end of function mutate_singlebp
def writeAA(my_file, seq,stop_locs): my_file.write('<p>') my_file.write(Seq.translate(seq).tostring()) my_file.write('<p><p>') #my_file.write('<p>') #my_file.write(str(stop_locs)) #my_file.write('<p>')
  1. end of function writeAA
def findstops(seq): stop_array = [] start = 0 stop_pos = 0 while stop_pos != -1: stop_pos = Seq.translate(seq).find('*',start) start = stop_pos + 1 stop_array = stop_array + [stop_pos] stop_array.remove(-1) return stop_array # end of while
  1. end of function findstops
  1. Start my script
input_file = open('p53seg.txt', 'r') output_file_name = os.path.join(os.getcwd(), 'anugraha-092409-prob4.html') output_file = open(output_file_name, 'w') writeheader(output_file)
  1. only one record for now but can expand this later to include multiple
records for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq a = findstops(my_seq) writeAA(output_file, my_seq, a) freq_count = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
  1. number of simulations
sim_num = 800 for sim_count in range(0, sim_num): # number of runs within a simulation num_of_runs = 100 pt_array = [] premature_termination_count = 0 # not counting the missing stop abnormality currently only premature terminations #ms_array = [] #missing_stop_count = 0 for run_num in range (0,num_of_runs): # I have to create a mutable sequence to use the mutate_singlebp function rand_seq = my_seq.tomutable() # calling the function that will do msot of the heavy lifting mutate_singlebp(rand_seq) # I have to convert the mutable sequence back to a normal DNA sequence in order to use the transcribe() method new_seq = Seq(rand_seq.toseq().tostring(), IUPAC.unambiguous_dna) b = findstops(new_seq) if a != b: notification_str = str('<p>[Sim #: ') + str(sim_count) + str(' |Run #: ') + str(run_num) notification_str = notification_str + str('] Stop Locations ') for loc in b: if a.count(loc) == 0:# write the premature termination Stop location in Red notification_str = notification_str + str(' Premature Termination @: ') notification_str = notification_str + str(loc) + str('') premature_termination_count = premature_termination_count + 1 else: # write the normal Stop locations in Blue notification_str = notification_str + str(' ') notification_str = notification_str + str(loc) + str('') # end For loop # actually write out the string created above output_file.write(notification_str) for loc in a: if b.count(loc) == 0:# write the missing Stop locations in Green output_file.write(' Missing Stop Mutation @: ') output_file.write(str(loc) + str('')) #missing_stop_count = missing_stop_count + 1 writeAA(output_file, new_seq, b) # count the number of premature terminations in current simulation pt_array = pt_array + [premature_termination_count] # ms_array = ms_array + [missing_stop_count] # exit the for(run_num) loop # Gather the frequency distribution for i in range(0,10): freq_count[i] = freq_count[i] + pt_array.count(i)
  1. end for sim_count loop
writerunsummary(num_of_runs) writefooter(output_file) input_file.close()
  1. close the file handle so the file is actually written to disk
output_file.close() print 'Completed Mutation runs .... finished writing' + str(output_file_name)
  1. Open internet explorer and display the file
os.system('explorer ' + output_file_name)
  1. Print final distribution count
print freq_count import numpy as np import matplotlib.pyplot as plt from pylab import * t1 = np.arange(1.0, 11.0, 1.0) title_string = 'BP 101: 9/24, #4 aggregated results By Anugraha Raman: ' title_string += str(sim_num) title_string += ' simulations of ' title_string += str(num_of_runs) title_string += ' runs of p53 seg sequence(1020 bp) mutation set at 1% rate' title(title_string)
  1. plot #1
subplot(111) for i in range (0,10): s1 = freq_count plt.plot(t1, s1, 'r^') grid(True) xlabel('# of Early Termination Mutations ') ylabel('Frequency of Occurrence') plt.show() </syntax>

Personal tools