TChan/Notebook/2007-2-27
From OpenWetWare
Jump to navigationJump to search
Code So Far
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import string
from Bio import Clustalw, Seq, SeqRecord, SubsMat
from Bio.Align import AlignInfo
from Bio.Align.FormatConvert import FormatConverter
cline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta'))
cline.set_output('test.aln')
alignment = Clustalw.do_alignment(cline)
#for s in alignment.get_all_seqs():
# sequence = s.seq.tostring()
align_info = AlignInfo.SummaryInfo(alignment)
sequence = align_info.dumb_consensus()
#make a reverse sequence
temp = list(sequence)
temp.reverse()
reverse_sequence = string.join(temp, '')
#make all possible reading frames
var1 = Seq.translate(sequence[0:])
var2 = Seq.translate(sequence[1:])
var3 = Seq.translate(sequence[2:])
var4 = Seq.translate(reverse_sequence[0:])
var5 = Seq.translate(reverse_sequence[1:])
var6 = Seq.translate(reverse_sequence[2:])
var_list = [var1, var2, var3, var4, var5, var6]
#make a table of all start and stop aa locations
start_list = [[],[],[],[],[],[]]
stop_list = [[],[],[],[],[],[]]
for var in range(len(var_list)):
for aa in range(len(var_list[var])):
if var_list[var][aa] == 'M':
start_list[var].append(aa)
elif var_list[var][aa] == '*':
stop_list[var].append(aa)
print start_list
RF_length_list = [[],[],[],[],[],[]]
for var in range(len(var_list)):
if start_list[var] != []:
c = 0
while start_list[var][0] > stop_list[var][c]:
c += 1
RF_length_list[var] = stop_list[var][c] - start_list[var][0]
else:
RF_length_list[var] = 'NA'
start_list[var][0] = 'NA'
lists = [start_list, stop_list, RF_length_list]
for w in range(len(lists)):
for x in range(len(lists[w]):
for y in range(len(lists[w][x])):
lists[w][x][y] = str(lists[w][x][y])
for variable in range(len(var_list)):
if variable < 3:
print '5->3 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable])
else:
print '3->5 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable])