IGEM:Harvard/2006/Adaptamers/Code/avoidv1
From OpenWetWare
Jump to navigationJump to search
import sys, re, random
def replace(string):
rets = ''
for x in string:
if x == 'A':
rets = rets+'1'
elif x == 'C':
rets = rets+'2'
elif x == 'G':
rets = rets+'3'
elif x == 'T':
rets = rets+'4'
return rets
def getseq(stri):
rets = ''
for x in stri:
if x == '1':
rets = rets+'A'
elif x == '2':
rets = rets+'C'
elif x == '3':
rets = rets+'G'
elif x == '4':
rets = rets+'T'
return rets
def find(string):
for x in range(len(replace(string))):
dicta = {}
def getrevcomp(st):
thecomp = ''
for x in st:
thecomp = str(5-int(x))+thecomp
return thecomp
def getcomp(st):
thecomp = ''
for x in st:
thecomp = thecomp+str(5-int(x))
return thecomp
def getrev(asd):
rever = ''
for x in asd:
rever = x + rever
return rever
def returnrandstring(lent):
retst = ''
for x in range(lent):
retst = retst + str(random.randint(1,4))
return retst
def mutate(sts, pos, free): #free: how many spots forward aren't good
place = pos+random.randint(0,free-1)
return sts[0:place]+str(random.randint(1,4))+sts[place+1:]
def mutatenoG(sts, pos, free): #free: how many spots forward aren't good
place = pos+random.randint(0,free-1)
return sts[0:place]+str(pow(4,random.randint(0,1)))+sts[place+1:]
def noGs(word):
for q in range(len(word)-2):
if word[q] == '3' and word[q+1] == '3' and word[q+2] == '3':
word = mutate(word, q, 3)
return word
def noCs(word):
for q in range(len(word)-2):
if word[q] == '2' and word[q+1] == '2' and word[q+2] == '2':
word = mutatenoG(word, q, 3)
return word
def GetGCcontent(work):
count = 0
for x in work:
if x == '2' or x == '3':
count = count+1
return float(count)/len(work)
def countself(tester, thebadstring):
tot = 0
greates = 0 #keeps track of greatest # of conflicts any 4 letter window has
greatasso = 0
for a in range(len(tester)-3):
subtotal = 0
for b in range(len(thebadstring)-3):
if b != a: #don't care about matching up to exact same spot
if tester[a:a+4] == thebadstring[b:b+4]:
subtotal = subtotal + 1
# print '1-' + str(a) + ' ' + tester[a:a+4]
# print 'xx' + str(b) + ' ' + thebadstring[b:b+4]
if getrevcomp(tester[a:a+4]) == thebadstring[b:b+4]:#complement bad
subtotal = subtotal + 1
# print '2-' + str(a) + ' ' + tester[a:a+4]
# print 'xx' + str(b) + ' ' + thebadstring[b:b+4]
if getcomp(tester[a:a+4]) == thebadstring[b:b+4]: #same strand bad
subtotal = subtotal + 1
# print '3-' + str(a) + ' ' + tester[a:a+4]
# print 'xx' + str(b) + ' ' + thebadstring[b:b+4]
if subtotal > greates: #identify a part that is causing lots of problems
greates = subtotal
greatasso = a
# print work[y:y+5]
# print badstring[x:x+5]
tot = tot + subtotal
return [tot, greatasso]
def getleast(badstring, length):
total = 0
work = returnrandstring(length)
badstring = replace(badstring)
work = noGs(work)
work = noCs(work)
beststring = work
iterations = 1000
besttotal = 10000000
greatassoc = 0
for s in range(iterations):
if random.randint(1,3) == 1: #1/3 of the time we mutate randomly
work = mutate(beststring, random.randint(1,length-4), 4)
else:
work = mutate(beststring, greatassoc, 4)
work = noGs(work)
work = noCs(work) #NoCs makes sure to not mutate to a situation with 3 G's in a row
worsestring = beststring+badstring
output = countself(work, worsestring)
greatassoc = output[1]
#print output[0]
if output[0] < besttotal:
besttotal = output[0]
beststring = work
return [besttotal, GetGCcontent(beststring), beststring]
def multleast(bads, lenh):
alist = []
for x in range(10):
alist = alist + [getleast(bads, lenh)]
alist.sort()
for x in alist:
print x