IGEM:Harvard/2006/Adaptamers/Code/avoidv1

import sys, re, random

def replace(string): rets = '' for x in string: if x == 'A': rets = rets+'1' elif x == 'C': rets = rets+'2' elif x == 'G': rets = rets+'3' elif x == 'T': rets = rets+'4' return rets

def getseq(stri): rets = '' for x in stri: if x == '1': rets = rets+'A'       elif x == '2': rets = rets+'C'       elif x == '3': rets = rets+'G'       elif x == '4': rets = rets+'T'   return rets

def find(string): for x in range(len(replace(string))): dicta = {}

def getrevcomp(st): thecomp = '' for x in st: thecomp = str(5-int(x))+thecomp return thecomp

def getcomp(st): thecomp = '' for x in st: thecomp = thecomp+str(5-int(x)) return thecomp

def getrev(asd): rever = '' for x in asd: rever = x + rever return rever

def returnrandstring(lent): retst = '' for x in range(lent): retst = retst + str(random.randint(1,4)) return retst

def mutate(sts, pos, free): #free: how many spots forward aren't good place = pos+random.randint(0,free-1) return sts[0:place]+str(random.randint(1,4))+sts[place+1:]

def mutatenoG(sts, pos, free): #free: how many spots forward aren't good place = pos+random.randint(0,free-1) return sts[0:place]+str(pow(4,random.randint(0,1)))+sts[place+1:]

def noGs(word): for q in range(len(word)-2): if word[q] == '3' and word[q+1] == '3' and word[q+2] == '3': word = mutate(word, q, 3) return word

def noCs(word): for q in range(len(word)-2): if word[q] == '2' and word[q+1] == '2' and word[q+2] == '2': word = mutatenoG(word, q, 3) return word

def GetGCcontent(work): count = 0 for x in work: if x == '2' or x == '3': count = count+1

return float(count)/len(work)

def countself(tester, thebadstring): tot = 0 greates = 0 #keeps track of greatest # of conflicts any 4 letter window has greatasso = 0 for a in range(len(tester)-3): subtotal = 0 for b in range(len(thebadstring)-3): if b != a: #don't care about matching up to exact same spot if tester[a:a+4] == thebadstring[b:b+4]: subtotal = subtotal + 1 #         print '1-' + str(a) + ' ' + tester[a:a+4] #         print 'xx' + str(b) + ' ' + thebadstring[b:b+4] if getrevcomp(tester[a:a+4]) == thebadstring[b:b+4]:#complement bad subtotal = subtotal + 1 #     print '2-' + str(a) + ' ' + tester[a:a+4] #     print 'xx' + str(b) + ' ' + thebadstring[b:b+4] if getcomp(tester[a:a+4]) == thebadstring[b:b+4]:  #same strand bad subtotal = subtotal + 1 #     print '3-' + str(a) + ' ' + tester[a:a+4] #     print 'xx' + str(b) + ' ' + thebadstring[b:b+4]

if subtotal > greates: #identify a part that is causing lots of problems greates = subtotal greatasso = a              # print work[y:y+5] # print badstring[x:x+5] tot = tot + subtotal return [tot, greatasso]

def getleast(badstring, length): total = 0 work = returnrandstring(length) badstring = replace(badstring) work = noGs(work) work = noCs(work)

beststring = work iterations = 1000 besttotal = 10000000 greatassoc = 0 for s in range(iterations): if random.randint(1,3) == 1:   #1/3 of the time we mutate randomly work = mutate(beststring, random.randint(1,length-4), 4) else: work = mutate(beststring, greatassoc, 4) work = noGs(work) work = noCs(work) #NoCs makes sure to not mutate to a situation with 3 G's in a row worsestring = beststring+badstring

output = countself(work, worsestring)

greatassoc = output[1] #print output[0] if output[0] < besttotal: besttotal = output[0] beststring = work return [besttotal, GetGCcontent(beststring), beststring]

def multleast(bads, lenh): alist = [] for x in range(10): alist = alist + [getleast(bads, lenh)] alist.sort

for x in alist: print x