TChan/Notebook/2007-2-27
From OpenWetWare
Jump to navigationJump to search
Code So Far
#!/usr/bin/env python # -*- coding: utf-8 -*- import os import string from Bio import Clustalw, Seq, SeqRecord, SubsMat from Bio.Align import AlignInfo from Bio.Align.FormatConvert import FormatConverter cline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta')) cline.set_output('test.aln') alignment = Clustalw.do_alignment(cline) #for s in alignment.get_all_seqs(): # sequence = s.seq.tostring() align_info = AlignInfo.SummaryInfo(alignment) sequence = align_info.dumb_consensus() #make a reverse sequence temp = list(sequence) temp.reverse() reverse_sequence = string.join(temp, '') #make all possible reading frames var1 = Seq.translate(sequence[0:]) var2 = Seq.translate(sequence[1:]) var3 = Seq.translate(sequence[2:]) var4 = Seq.translate(reverse_sequence[0:]) var5 = Seq.translate(reverse_sequence[1:]) var6 = Seq.translate(reverse_sequence[2:]) var_list = [var1, var2, var3, var4, var5, var6] #make a table of all start and stop aa locations start_list = [[],[],[],[],[],[]] stop_list = [[],[],[],[],[],[]] for var in range(len(var_list)): for aa in range(len(var_list[var])): if var_list[var][aa] == 'M': start_list[var].append(aa) elif var_list[var][aa] == '*': stop_list[var].append(aa) print start_list RF_length_list = [[],[],[],[],[],[]] for var in range(len(var_list)): if start_list[var] != []: c = 0 while start_list[var][0] > stop_list[var][c]: c += 1 RF_length_list[var] = stop_list[var][c] - start_list[var][0] else: RF_length_list[var] = 'NA' start_list[var][0] = 'NA' lists = [start_list, stop_list, RF_length_list] for w in range(len(lists)): for x in range(len(lists[w]): for y in range(len(lists[w][x])): lists[w][x][y] = str(lists[w][x][y]) for variable in range(len(var_list)): if variable < 3: print '5->3 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable]) else: print '3->5 frame %i: start codon position: %s. stop codon position: %s. Transcript length: %s.' % (variable, start_list[variable][0], stop_list[variable][c], RF_length_list[variable])