TChan/Notebook/2007-2-27

Code So Far

 * 1) !/usr/bin/env python
 * 2) -*- coding: utf-8 -*-

import os import string from Bio import Clustalw, Seq, SeqRecord, SubsMat from Bio.Align import AlignInfo from Bio.Align.FormatConvert import FormatConverter

cline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta')) cline.set_output('test.aln') alignment = Clustalw.do_alignment(cline)


 * 1) for s in alignment.get_all_seqs:
 * 2)    sequence = s.seq.tostring

align_info = AlignInfo.SummaryInfo(alignment) sequence = align_info.dumb_consensus

temp = list(sequence) temp.reverse reverse_sequence = string.join(temp, '')
 * 1) make a reverse sequence

var1 = Seq.translate(sequence[0:]) var2 = Seq.translate(sequence[1:]) var3 = Seq.translate(sequence[2:])
 * 1) make all possible reading frames

var4 = Seq.translate(reverse_sequence[0:]) var5 = Seq.translate(reverse_sequence[1:]) var6 = Seq.translate(reverse_sequence[2:])

var_list = [var1, var2, var3, var4, var5, var6]

start_list = ],[],[],[],[],[ stop_list = ],[],[],[],[],[
 * 1) make a table of all start and stop aa locations

for var in range(len(var_list)): for aa in range(len(var_list[var])): if var_list[var][aa] == 'M': start_list[var].append(aa) elif var_list[var][aa] == '*': stop_list[var].append(aa)

print start_list

RF_length_list = ],[],[],[],[],[

for var in range(len(var_list)): if start_list[var] != []: c = 0 while start_list[var][0] > stop_list[var][c]: c += 1 RF_length_list[var] = stop_list[var][c] - start_list[var][0] else: RF_length_list[var] = 'NA' start_list[var][0] = 'NA'

lists = [start_list, stop_list, RF_length_list]

for w in range(len(lists)): for x in range(len(lists[w]):       for y in range(len(lists[w][x])):            lists[w][x][y] = str(lists[w][x][y])

for variable in range(len(var_list)): if variable < 3: print '5->3 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable]) else: print '3->5 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable])