From OpenWetWare
<syntax type="python>
-
- BiPython Script Created By Anugraha Raman
- For BP 101 September 24, 2009 Problem #4
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from random import *
import os
-
-
- Functions defined in this script file are as follows:
-
- writeheader(myfile) : Writes a specific HTML header using the myfile
handle
- writefooter(myfile) : Writes a specific HTML footer using the myfile
handle
- writerunsummary(myfile) : Writes a specific set of sumamry information
using the myfile handle
- mutatesinglebp(seq, random_Seed, forevery, prob) : Mutates a single
base pair for forevery
- location range using a probability of prob;
returns mutated sequence
- writeAA(myfile, seq,stop_locs) : Writes the amino acids using the myfile
handle
- findstops(seq) : Finds the stop locations in the given DNA
sequence
-
-
-
-
def writeheader(my_file):
#Write an extra cool header file
header = str ('
Biophysisc 101: Genomics, Computing and Economics >> ')
header = header + str(' Problem 4: ')
my_file.write(header)
my_file.write('')
my_file.write('
')
my_file.write('')
my_file.write('')
# end of function writeheader
def writefooter(my_file):
# write the footer on the html file
footer = str ('
BioPython Scripting By: Anugraha Raman ')
footer = footer + str ('Script Source:
AbnormalStops.py ')
my_file.write(footer)
my_file.write(' ')
- end of function writefooter
def writerunsummary (runs):
output_file.write('
[Total # of Runs: ' + str(runs))
output_file.write('] ')
output_file.write('Normal Stops are noted in blue
')
output_file.write('Abnormal Stops are noted in red
or ')
output_file.write(str(' green </font>'))
- end of function writerunheader
- mutate_singlebp mutates a single base pair for forevery location range
using a
- probability of prob
def mutate_singlebp(seq, random_seed=0, forevery=100, prob=0.01):
# reset seed
if random_seed == 0:
seed()
else:
seed(random_seed)
for x in range(0,forevery):
r=randrange(0,1/prob)
if r == 0:
mutate_pos = randrange(0,len(seq)-1)
old_base = seq[mutate_pos]
if old_base == 'a':
mutated_base = choice(['c', 't', 'g'])
elif old_base == 'c':
mutated_base = choice(['a', 't', 'g'])
elif old_base == 't':
mutated_base = choice(['a', 'c', 'g'])
else: # 'g'
mutated_base = choice(['a', 'c', 't'])
# mutable sequence seq is updated
seq[mutate_pos] = mutated_base
# end if
# end for
- end of function mutate_singlebp
def writeAA(my_file, seq,stop_locs):
my_file.write('<p>')
my_file.write(Seq.translate(seq).tostring())
my_file.write('<p><p>')
#my_file.write('<p>')
#my_file.write(str(stop_locs))
#my_file.write('<p>')
- end of function writeAA
def findstops(seq):
stop_array = []
start = 0
stop_pos = 0
while stop_pos != -1:
stop_pos = Seq.translate(seq).find('*',start)
start = stop_pos + 1
stop_array = stop_array + [stop_pos]
stop_array.remove(-1)
return stop_array
# end of while
- end of function findstops
- Start my script
input_file = open('p53seg.txt', 'r')
output_file_name = os.path.join(os.getcwd(), 'anugraha-092409-prob4.html')
output_file = open(output_file_name, 'w')
writeheader(output_file)
- only one record for now but can expand this later to include multiple
records
for cur_record in SeqIO.parse(input_file, "fasta"):
my_seq = cur_record.seq
a = findstops(my_seq)
writeAA(output_file, my_seq, a)
freq_count = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- number of simulations
sim_num = 800
for sim_count in range(0, sim_num):
# number of runs within a simulation
num_of_runs = 100
pt_array = []
premature_termination_count = 0
# not counting the missing stop abnormality currently only premature
terminations
#ms_array = []
#missing_stop_count = 0
for run_num in range (0,num_of_runs):
# I have to create a mutable sequence to use the mutate_singlebp
function
rand_seq = my_seq.tomutable()
# calling the function that will do msot of the heavy lifting
mutate_singlebp(rand_seq)
# I have to convert the mutable sequence back to a normal DNA
sequence in order to use the transcribe() method
new_seq = Seq(rand_seq.toseq().tostring(), IUPAC.unambiguous_dna)
b = findstops(new_seq)
if a != b:
notification_str = str('<p>[Sim #: ') + str(sim_count) +
str(' |Run #: ') + str(run_num)
notification_str = notification_str + str('] Stop
Locations ')
for loc in b:
if a.count(loc) == 0:# write the premature termination Stop
location in Red
notification_str = notification_str + str(' Premature Termination @: ')
notification_str = notification_str + str(loc) +
str('')
premature_termination_count =
premature_termination_count + 1
else: # write the normal Stop locations in Blue
notification_str = notification_str + str(' ')
notification_str = notification_str + str(loc) +
str('')
# end For loop
# actually write out the string created above
output_file.write(notification_str)
for loc in a:
if b.count(loc) == 0:# write the missing Stop locations in
Green
output_file.write(' Missing Stop Mutation @: ')
output_file.write(str(loc) + str(''))
#missing_stop_count = missing_stop_count + 1
writeAA(output_file, new_seq, b)
# count the number of premature terminations in current simulation
pt_array = pt_array + [premature_termination_count]
# ms_array = ms_array + [missing_stop_count]
# exit the for(run_num) loop
# Gather the frequency distribution
for i in range(0,10):
freq_count[i] = freq_count[i] + pt_array.count(i)
- end for sim_count loop
writerunsummary(num_of_runs)
writefooter(output_file)
input_file.close()
- close the file handle so the file is actually written to disk
output_file.close()
print 'Completed Mutation runs .... finished writing' +
str(output_file_name)
- Open internet explorer and display the file
os.system('explorer ' + output_file_name)
- Print final distribution count
print freq_count
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
t1 = np.arange(1.0, 11.0, 1.0)
title_string = 'BP 101: 9/24, #4 aggregated results By Anugraha Raman: '
title_string += str(sim_num)
title_string += ' simulations of '
title_string += str(num_of_runs)
title_string += ' runs of p53 seg sequence(1020 bp) mutation set at 1%
rate'
title(title_string)
- plot #1
subplot(111)
for i in range (0,10):
s1 = freq_count
plt.plot(t1, s1, 'r^')
grid(True)
xlabel('# of Early Termination Mutations ')
ylabel('Frequency of Occurrence')
plt.show()
</syntax>