#Benjamin Leibowicz #Biophysics 101 #Assignment 3 # Exercise 1 p53segraw = """cggagcagctcactattcacccgatgagaggggaggagagagagagaaaatgtcctttag gccggttcctcttacttggcagagggaggctgctattctccgcctgcatttctttttctg gattacttagttatggcctttgcaaaggcaggggtatttgttttgatgcaaacctcaatc cctccccttctttgaatggtgtgccccaccccccgggtcgcctgcaacctaggcggacgc taccatggcgtagacagggagggaaagaagtgtgcagaaggcaagcccggaggcactttc aagaatgagcatatctcatcttcccggagaaaaaaaaaaaagaatggtacgtctgagaat gaaattttgaaagagtgcaatgatgggtcgtttgataatttgtcgggaaaaacaatctac ctgttatctagctttgggctaggccattccagttccagacgcaggctgaacgtcgtgaag cggaaggggcgggcccgcaggcgtccgtgtggtcctccgtgcagccctcggcccgagccg gttcttcctggtaggaggcggaactcgaattcatttctcccgctgccccatctcttagct cgcggttgtttcattccgcagtttcttcccatgcacctgccgcgtaccggccactttgtg ccgtacttacgtcatctttttcctaaatcgaggtggcatttacacacagcgccagtgcac acagcaagtgcacaggaagatgagttttggcccctaaccgctccgtgatgcctaccaagt cacagacccttttcatcgtcccagaaacgtttcatcacgtctcttcccagtcgattcccg accccacctttattttgatctccataaccattttgcctgttggagaacttcatatagaat ggaatcaggatgggcgctgtggctcacgcctgcactttggctcacgcctgcactttggga ggccgaggcgggcggattacttgaggataggagttccagaccagcgtggccaacgtggtg""" p53seg = "" rawlength = len(p53segraw) numC = 0 numG = 0 numT = 0 numA = 0 # We could use the count method to obtain the number of occurences of # each nucleotide but Python had difficulty getting the length of the # original string correct. It seemed to be adding an extra mystery # character to each line. The following method doubles by solving this # problem. for i in range(0,rawlength): if p53segraw[i]=='a': p53seg = p53seg + 'a' numA = numA + 1 elif p53segraw[i]=='t': p53seg = p53seg + 't' numT = numT + 1 elif p53segraw[i]=='c': p53seg = p53seg + 'c' numC = numC + 1 elif p53segraw[i]=='g': p53seg = p53seg + 'g' numG = numG + 1 length = len(p53seg) GCcontent = (float(numC + numG)/float(length))*100 print 'The GC content of p53seg is ',GCcontent, '%.' # Exercise 2 rc = p53seg[::-1] reversecomplement = "" for i in range(0,length): if rc[i]=='a': reversecomplement = reversecomplement + 't' elif rc[i]=='t': reversecomplement = reversecomplement + 'a' elif rc[i]=='c': reversecomplement = reversecomplement + 'g' elif rc[i]=='g': reversecomplement = reversecomplement + 'c' print '\nThe reverse complement of p53seg is:\n', reversecomplement # Exercise 3 standard = { 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C', 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C', 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*', 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W', 'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R', 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R', 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R', 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R', 'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S', 'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S', 'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R', 'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R', 'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G', 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G', 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G', 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G'} # Translate the DNA sequence into a protein sequence in all 6 frames. j = 0 proteinsequenceplus1 = "" while j<1020: codon = p53seg[j:j+3] proteinsequenceplus1 = proteinsequenceplus1 + standard[codon] j = j+3 print '\nThe protein sequence in the +1 frame is:\n', proteinsequenceplus1 k = 1 proteinsequenceplus2 = "" while k<1017: codon = p53seg[k:k+3] proteinsequenceplus2 = proteinsequenceplus2 + standard[codon] k = k+3 print '\nThe protein sequence in the +2 frame is:\n', proteinsequenceplus2 l = 2 proteinsequenceplus3 = "" while l<1017: codon = p53seg[l:l+3] proteinsequenceplus3 = proteinsequenceplus3 + standard[codon] l = l+3 print '\nThe protein sequence in the +3 frame is:\n', proteinsequenceplus3 r = 0 proteinsequenceminus1 = "" while r<1020: codon = reversecomplement[r:r+3] proteinsequenceminus1 = proteinsequenceminus1 + standard[codon] r = r+3 print '\nThe protein sequence in the -1 frame is:\n', proteinsequenceminus1 s = 1 proteinsequenceminus2 = "" while s<1017: codon = reversecomplement[s:s+3] proteinsequenceminus2 = proteinsequenceminus2 + standard[codon] s = s+3 print '\nThe protein sequence in the -2 frame is:\n', proteinsequenceminus2 t = 2 proteinsequenceminus3 = "" while t<1017: codon = reversecomplement[t:t+3] proteinsequenceminus3 = proteinsequenceminus3 + standard[codon] t = t+3 print '\nThe protein sequence in the -3 frame is:\n', proteinsequenceminus3 # Exercise 4 p53segmut = p53seg import random for z in range(1,13): rand = random.randint(0,length) chance = random.randint(1,4) if chance==1: p53segmut = p53segmut[0:rand] + 'a' + p53segmut[rand+1:length] elif chance==2: p53segmut = p53segmut[0:rand] + 't' + p53segmut[rand+1:length] elif chance==3: p53segmut = p53segmut[0:rand] + 'g' + p53segmut[rand+1:length] elif chance==4: p53segmut = p53segmut[0:rand] + 'c' + p53segmut[rand+1:length] m = 0 proteinsequencemut = "" while m<1020: codon = p53segmut[m:m+3] proteinsequencemut = proteinsequencemut + standard[codon] m = m+3 print '\nThe mutated protein sequence in the +1 frame is:\n', proteinsequencemut for c in range(0,len(proteinsequenceplus1)): if proteinsequenceplus1[c]!=proteinsequencemut[c]: print "\nNormal codon:", proteinsequenceplus1[c], "Mutation changes to:", proteinsequencemut[c], "."