IGEM:Harvard/2006/Adaptamers/Code/avoidv1

From OpenWetWare
Jump to navigationJump to search

import sys, re, random

def replace(string):
    rets = ''
    for x in string:
        if x == 'A':
            rets = rets+'1'
        elif x == 'C':
            rets = rets+'2'
        elif x == 'G':
            rets = rets+'3'
        elif x == 'T':
            rets = rets+'4'
    return rets

def getseq(stri):
    rets = ''
    for x in stri:
        if x == '1':
            rets = rets+'A'
        elif x == '2':
            rets = rets+'C'
        elif x == '3':
            rets = rets+'G'
        elif x == '4':
            rets = rets+'T'
    return rets

def find(string):
    for x in range(len(replace(string))):
        dicta = {}

def getrevcomp(st):
    thecomp = ''
    for x in st:
        thecomp = str(5-int(x))+thecomp
    return thecomp

def getcomp(st):
    thecomp = ''
    for x in st:
        thecomp = thecomp+str(5-int(x))
    return thecomp

def getrev(asd):
    rever = ''
    for x in asd:
        rever = x + rever
    return rever

def returnrandstring(lent):
    retst = ''
    for x in range(lent):
        retst = retst + str(random.randint(1,4))
    return retst

def mutate(sts, pos, free): #free: how many spots forward aren't good
    place = pos+random.randint(0,free-1)
    
    return sts[0:place]+str(random.randint(1,4))+sts[place+1:]

def mutatenoG(sts, pos, free): #free: how many spots forward aren't good
    place = pos+random.randint(0,free-1)
    
    return sts[0:place]+str(pow(4,random.randint(0,1)))+sts[place+1:]

def noGs(word):
    for q in range(len(word)-2):
        if word[q] == '3' and word[q+1] == '3' and word[q+2] == '3':
            word = mutate(word, q, 3)
    return word

def noCs(word):
    for q in range(len(word)-2):
        if word[q] == '2' and word[q+1] == '2' and word[q+2] == '2':
            word = mutatenoG(word, q, 3)
    return word

def GetGCcontent(work):
    count = 0
    for x in work:
        if x == '2' or x == '3':
            count = count+1

    return float(count)/len(work)

def countself(tester, thebadstring):
    tot = 0
    greates = 0 #keeps track of greatest # of conflicts any 4 letter window has
    greatasso = 0
    for a in range(len(tester)-3):
        subtotal = 0
        for b in range(len(thebadstring)-3):
            if b != a:  #don't care about matching up to exact same spot
                if tester[a:a+4] == thebadstring[b:b+4]:
                    subtotal = subtotal + 1
          #          print '1-' + str(a) + ' ' + tester[a:a+4]
          #          print 'xx' + str(b) + ' ' + thebadstring[b:b+4]
            if getrevcomp(tester[a:a+4]) == thebadstring[b:b+4]:#complement bad
                subtotal = subtotal + 1
          #      print '2-' + str(a) + ' ' + tester[a:a+4]
          #      print 'xx' + str(b) + ' ' + thebadstring[b:b+4]
            if getcomp(tester[a:a+4]) == thebadstring[b:b+4]:   #same strand bad
                subtotal = subtotal + 1
          #      print '3-' + str(a) + ' ' + tester[a:a+4]
          #      print 'xx' + str(b) + ' ' + thebadstring[b:b+4]

                if subtotal > greates: #identify a part that is causing lots of problems
                    greates = subtotal
                    greatasso = a
               # print work[y:y+5]
               # print badstring[x:x+5]
        tot = tot + subtotal
    return [tot, greatasso]

def getleast(badstring, length):
    total = 0
    work = returnrandstring(length)
    badstring = replace(badstring)
    
    work = noGs(work)
    work = noCs(work)

    beststring = work
    
    iterations = 1000   
    besttotal = 10000000
    greatassoc = 0
    
    for s in range(iterations):       
        if random.randint(1,3) == 1:    #1/3 of the time we mutate randomly
            work = mutate(beststring, random.randint(1,length-4), 4)
        else:
            work = mutate(beststring, greatassoc, 4)
            
        work = noGs(work)
        work = noCs(work) #NoCs makes sure to not mutate to a situation with 3 G's in a row
        
        worsestring = beststring+badstring

        output = countself(work, worsestring)

        greatassoc = output[1]
        
        #print output[0]
        if output[0] < besttotal:
            besttotal = output[0]
            beststring = work
            
    return [besttotal, GetGCcontent(beststring), beststring]

def multleast(bads, lenh):
    alist = []
    for x in range(10):
        alist = alist + [getleast(bads, lenh)]
    alist.sort()

    for x in alist:
        print x