IGEM:Harvard/2006/Adaptamers/Code/avoidv1
From OpenWetWare
Jump to navigationJump to search
import sys, re, random def replace(string): rets = '' for x in string: if x == 'A': rets = rets+'1' elif x == 'C': rets = rets+'2' elif x == 'G': rets = rets+'3' elif x == 'T': rets = rets+'4' return rets def getseq(stri): rets = '' for x in stri: if x == '1': rets = rets+'A' elif x == '2': rets = rets+'C' elif x == '3': rets = rets+'G' elif x == '4': rets = rets+'T' return rets def find(string): for x in range(len(replace(string))): dicta = {} def getrevcomp(st): thecomp = '' for x in st: thecomp = str(5-int(x))+thecomp return thecomp def getcomp(st): thecomp = '' for x in st: thecomp = thecomp+str(5-int(x)) return thecomp def getrev(asd): rever = '' for x in asd: rever = x + rever return rever def returnrandstring(lent): retst = '' for x in range(lent): retst = retst + str(random.randint(1,4)) return retst def mutate(sts, pos, free): #free: how many spots forward aren't good place = pos+random.randint(0,free-1) return sts[0:place]+str(random.randint(1,4))+sts[place+1:] def mutatenoG(sts, pos, free): #free: how many spots forward aren't good place = pos+random.randint(0,free-1) return sts[0:place]+str(pow(4,random.randint(0,1)))+sts[place+1:] def noGs(word): for q in range(len(word)-2): if word[q] == '3' and word[q+1] == '3' and word[q+2] == '3': word = mutate(word, q, 3) return word def noCs(word): for q in range(len(word)-2): if word[q] == '2' and word[q+1] == '2' and word[q+2] == '2': word = mutatenoG(word, q, 3) return word def GetGCcontent(work): count = 0 for x in work: if x == '2' or x == '3': count = count+1 return float(count)/len(work) def countself(tester, thebadstring): tot = 0 greates = 0 #keeps track of greatest # of conflicts any 4 letter window has greatasso = 0 for a in range(len(tester)-3): subtotal = 0 for b in range(len(thebadstring)-3): if b != a: #don't care about matching up to exact same spot if tester[a:a+4] == thebadstring[b:b+4]: subtotal = subtotal + 1 # print '1-' + str(a) + ' ' + tester[a:a+4] # print 'xx' + str(b) + ' ' + thebadstring[b:b+4] if getrevcomp(tester[a:a+4]) == thebadstring[b:b+4]:#complement bad subtotal = subtotal + 1 # print '2-' + str(a) + ' ' + tester[a:a+4] # print 'xx' + str(b) + ' ' + thebadstring[b:b+4] if getcomp(tester[a:a+4]) == thebadstring[b:b+4]: #same strand bad subtotal = subtotal + 1 # print '3-' + str(a) + ' ' + tester[a:a+4] # print 'xx' + str(b) + ' ' + thebadstring[b:b+4] if subtotal > greates: #identify a part that is causing lots of problems greates = subtotal greatasso = a # print work[y:y+5] # print badstring[x:x+5] tot = tot + subtotal return [tot, greatasso] def getleast(badstring, length): total = 0 work = returnrandstring(length) badstring = replace(badstring) work = noGs(work) work = noCs(work) beststring = work iterations = 1000 besttotal = 10000000 greatassoc = 0 for s in range(iterations): if random.randint(1,3) == 1: #1/3 of the time we mutate randomly work = mutate(beststring, random.randint(1,length-4), 4) else: work = mutate(beststring, greatassoc, 4) work = noGs(work) work = noCs(work) #NoCs makes sure to not mutate to a situation with 3 G's in a row worsestring = beststring+badstring output = countself(work, worsestring) greatassoc = output[1] #print output[0] if output[0] < besttotal: besttotal = output[0] beststring = work return [besttotal, GetGCcontent(beststring), beststring] def multleast(bads, lenh): alist = [] for x in range(10): alist = alist + [getleast(bads, lenh)] alist.sort() for x in alist: print x