User talk:Anugraha Raman/AbnormalStops.py


 * 1) BiPython Script Created By Anugraha Raman
 * 2) For BP 101 September 24, 2009 Problem #4
 * 1) For BP 101 September 24, 2009 Problem #4

from Bio import SeqIO from Bio.Seq import Seq from Bio.Alphabet import IUPAC from random import * import os

handle handle using the myfile handle base pair for forevery returns mutated sequence handle sequence
 * 1)   Functions defined in this script file are as follows:
 * 2)   writeheader(myfile)     : Writes a specific HTML header using the myfile
 * 1)   Functions defined in this script file are as follows:
 * 2)   writeheader(myfile)     : Writes a specific HTML header using the myfile
 * 1)   writeheader(myfile)     : Writes a specific HTML header using the myfile
 * 1)   writefooter(myfile)     : Writes a specific HTML footer using the myfile
 * 1)   writerunsummary(myfile) : Writes a specific set of sumamry information
 * 1)   mutatesinglebp(seq, random_Seed, forevery, prob) : Mutates  a single
 * 1)                             location range using a probability of prob;
 * 1)   writeAA(myfile, seq,stop_locs) : Writes the amino acids using the myfile
 * 1)   findstops(seq)          : Finds the stop locations in the given DNA

def writeheader(my_file): #Write an extra cool header file header = str ('  Biophysisc 101: Genomics, Computing and Economics >> ') header = header + str(' Problem 4: ') my_file.write(header) my_file.write(' ') my_file.write('') my_file.write(' ') my_file.write(' ')


 * 1) end of function writeheader

def writefooter(my_file): # write the footer on the html file footer = str ('  BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source:  AbnormalStops.py ') my_file.write(footer) my_file.write(' ')


 * 1) end of function writefooter

def writerunsummary (runs): output_file.write(' [Total # of Runs: ' + str(runs)) output_file.write('] ') output_file.write('Normal Stops are noted in  blue ') output_file.write('Abnormal Stops are noted in  red or ') output_file.write(str(' green '))


 * 1) end of function writerunheader

using a def mutate_singlebp(seq, random_seed=0, forevery=100, prob=0.01): # reset seed if random_seed == 0: seed else: seed(random_seed)
 * 1) mutate_singlebp mutates  a single base pair for forevery location range
 * 1) probability of prob

for x in range(0,forevery): r=randrange(0,1/prob) if r == 0: mutate_pos = randrange(0,len(seq)-1) old_base = seq[mutate_pos] if old_base == 'a': mutated_base = choice(['c', 't', 'g']) elif old_base == 'c': mutated_base = choice(['a', 't', 'g']) elif old_base == 't': mutated_base = choice(['a', 'c', 'g']) else: # 'g'              mutated_base = choice(['a', 'c', 't']) # mutable sequence seq is updated seq[mutate_pos] = mutated_base # end if  # end for


 * 1) end of function mutate_singlebp

def writeAA(my_file, seq,stop_locs): my_file.write(' ') my_file.write(Seq.translate(seq).tostring) my_file.write(' ') #my_file.write(' ') #my_file.write(str(stop_locs)) #my_file.write(' ')


 * 1) end of function writeAA

def findstops(seq): stop_array = [] start = 0 stop_pos = 0 while stop_pos != -1: stop_pos = Seq.translate(seq).find('*',start) start = stop_pos + 1 stop_array = stop_array + [stop_pos] stop_array.remove(-1) return stop_array # end of while


 * 1) end of function findstops

input_file = open('p53seg.txt', 'r') output_file_name = os.path.join(os.getcwd, 'anugraha-092409-prob4.html') output_file = open(output_file_name, 'w')
 * 1) Start my script

writeheader(output_file)

records for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq
 * 1) only one record for now but can expand this later to include multiple

a = findstops(my_seq) writeAA(output_file, my_seq, a)

freq_count = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

sim_num = 800 for sim_count in range(0, sim_num):
 * 1) number of simulations

# number of runs within a simulation num_of_runs = 100 pt_array = [] premature_termination_count = 0 # not counting the missing stop abnormality currently only premature terminations #ms_array = [] #missing_stop_count = 0

for run_num in range (0,num_of_runs):

# I have to create a mutable sequence to use the mutate_singlebp function rand_seq = my_seq.tomutable

# calling the function that will do msot of the heavy lifting mutate_singlebp(rand_seq)

# I have to convert the mutable sequence back to a normal DNA sequence in order to use the transcribe method new_seq = Seq(rand_seq.toseq.tostring, IUPAC.unambiguous_dna)

b = findstops(new_seq)

if a != b:          notification_str = str(' [Sim #: ') + str(sim_count) + str(' |Run #: ') + str(run_num) notification_str = notification_str + str(']  Stop Locations ') for loc in b:              if a.count(loc) == 0:# write the premature termination Stop location in Red notification_str = notification_str + str(' Premature Termination @: ') notification_str = notification_str + str(loc) + str(' ') premature_termination_count = premature_termination_count + 1

else: # write the normal Stop locations in Blue notification_str = notification_str + str(' ') notification_str = notification_str + str(loc) + str(' ') # end For loop # actually write out the string created above output_file.write(notification_str)

for loc in a:              if b.count(loc) == 0:# write the missing Stop locations in Green output_file.write(' Missing Stop Mutation @: ') output_file.write(str(loc) + str(' ')) #missing_stop_count = missing_stop_count + 1

writeAA(output_file, new_seq, b)

# count the number of premature terminations in current simulation pt_array = pt_array + [premature_termination_count] # ms_array = ms_array + [missing_stop_count]

# exit the for(run_num) loop

# Gather the frequency distribution for i in range(0,10): freq_count[i] = freq_count[i] + pt_array.count(i)

writerunsummary(num_of_runs)
 * 1) end for sim_count loop

writefooter(output_file)

input_file.close

output_file.close
 * 1) close the file handle so the file is actually written to disk

print 'Completed Mutation runs .... finished writing' + str(output_file_name) os.system('explorer ' + output_file_name)
 * 1) Open internet explorer and display the file

print freq_count
 * 1) Print final distribution count

import numpy as np import matplotlib.pyplot as plt from pylab import *

t1 = np.arange(1.0, 11.0, 1.0) title_string = 'BP 101: 9/24, #4 aggregated results By Anugraha Raman: ' title_string += str(sim_num) title_string += ' simulations of ' title_string += str(num_of_runs) title_string += ' runs of p53 seg sequence(1020 bp) mutation set at 1% rate'

title(title_string)

subplot(111) for i in range (0,10): s1 = freq_count
 * 1) plot #1

plt.plot(t1, s1, 'r^') grid(True) xlabel('# of Early Termination Mutations ') ylabel('Frequency of Occurrence')

plt.show