TChan/Notebook/2007-2-27

From OpenWetWare
Jump to navigationJump to search

Code So Far

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import string
from Bio import Clustalw, Seq, SeqRecord, SubsMat
from Bio.Align import AlignInfo
from Bio.Align.FormatConvert import FormatConverter

cline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta'))
cline.set_output('test.aln')
alignment = Clustalw.do_alignment(cline)

#for s in alignment.get_all_seqs():
#    sequence = s.seq.tostring()

align_info = AlignInfo.SummaryInfo(alignment)
sequence = align_info.dumb_consensus()

#make a reverse sequence
temp = list(sequence)
temp.reverse()
reverse_sequence = string.join(temp, '')

#make all possible reading frames
var1 = Seq.translate(sequence[0:])
var2 = Seq.translate(sequence[1:])
var3 = Seq.translate(sequence[2:])

var4 = Seq.translate(reverse_sequence[0:])
var5 = Seq.translate(reverse_sequence[1:])
var6 = Seq.translate(reverse_sequence[2:])

var_list = [var1, var2, var3, var4, var5, var6]

#make a table of all start and stop aa locations 
start_list = [[],[],[],[],[],[]]
stop_list = [[],[],[],[],[],[]]


for var in range(len(var_list)):
    for aa in range(len(var_list[var])):
        if var_list[var][aa] == 'M':
            start_list[var].append(aa)
        elif var_list[var][aa] == '*':
            stop_list[var].append(aa)

print start_list


RF_length_list = [[],[],[],[],[],[]]

for var in range(len(var_list)):
    if start_list[var] != []:
        c = 0
        while start_list[var][0] > stop_list[var][c]:
            c += 1 
        RF_length_list[var] = stop_list[var][c] - start_list[var][0]
    else:
        RF_length_list[var] = 'NA'
        start_list[var][0] = 'NA'

lists = [start_list, stop_list, RF_length_list]

for w in range(len(lists)):
    for x in range(len(lists[w]):
        for y in range(len(lists[w][x])):
            lists[w][x][y] = str(lists[w][x][y])

for variable in range(len(var_list)):               
    if variable < 3:
        print '5->3 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable])
    else:
        print '3->5 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable])