IGEM:Harvard/2006/Container Design 4/Python Code/Full Code Final

From OpenWetWare
Jump to navigationJump to search

Modified Versions of William's Code - Finalized 7/11/06

Main

#!/usr/bin/python

import sys
# for regular expression matching
import re
# for reading structures in from files
import pickle
from honeycomb_pointers_v1 import *
from pointers_to_sequences_v1 import *

filename = raw_input('Enter the filename for the text-based node lattice array: ')
num_zones = int(raw_input('Enter the number of 42bp zones per double helix: '))
#filename = 'lid_1_ascii.txt'
#num_zones = 3
periodic_structure_flag = False

node_ra = read_text_format_node_array(filename)
print_node_lattice_array(node_ra)
TPP_ra = token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag)
path_ra = token_pointer_path_array(TPP_ra)
OTP_ra = oligo_token_pointer_array(path_ra)
check_token_representation(OTP_ra, TPP_ra)


# Print out the longest path, just for fun
longest_path = []
for path in path_ra:
	if len(path) > len(longest_path):
		longest_path = path
print_path(longest_path, TPP_ra)

#####
# Oligo splitting - this time reading from a file and not asking for user
# input
#####
fin_barrel = None
fin_lid = None

try:
	fin_barrel = open("barrel_oligos_to_split.txt", "r")
	fin_lid = open("lid_oligos_to_split.txt", "r")
except IOError, e:
	print "Error in file IO: ", e

# Ask the user if they are running a lid or a barrel

shape = int(raw_input("Enter 1 if you are running a barrel, 2 if lid: "))
if (shape == 1):
	oligos_to_split = pickle.load(fin_barrel)
elif (shape == 2):
	oligos_to_split = pickle.load(fin_lid)
else:
	print 'Please modify code or run with lid or 30hb barrel'
	

new_OTP_ra = OTP_ra[:]
for pair in oligos_to_split:
	oligo_num = pair[0]
	print oligo_num
	num_toks = pair[1]
	print num_toks
	
       	new_OTP_ra = split_oligo(new_OTP_ra, oligo_num, num_toks)

       	print new_OTP_ra
     	print len(OTP_ra)
       	print len(new_OTP_ra)

# if it's the barrel design and so that all the numbers aren't messed
# up, the 7bp token on the start of strand 10 needs to be removed
# because it's going to be left unpaired

if (shape == 1):
	new_OTP_ra = new_OTP_ra[:61] + new_OTP_ra[62:]

if fin_barrel: fin_barrel.close()
if fin_lid: fin_lid.close()




####
# generate and print the oligo token grid
####

# Initialize the grid with all periods
num_strands = len(TPP_ra)
num_subzones = len(TPP_ra[0])

sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
grid_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]

oligo_num = 0
for oligo in new_OTP_ra:
	grid_ra = generate_oligo_path(oligo, oligo_num, grid_ra) 
	oligo_num = oligo_num + 1
print grid_ra

print_all_oligos(grid_ra, num_strands, num_subzones)

####
# Generate oligo sequences
####
strand_ra = strand_array('lid_1_scaffold.txt', TPP_ra)
token_ra = token_array(strand_ra)
oligo_ra = oligo_array(new_OTP_ra, token_ra)

# The array of oligos that does have the oligos split but does not have the
# aptamers or latches added.
original_oligo_ra = oligo_ra[:]
latch_2_oligo_ra = oligo_ra[:]

#####
# Add apts this time using file input instead of user input
#####

# Constants
apt_seq = 'GGTTGGTGTGGTTGG'
T_linker = 'TTT'

fin_barrel = None
        
try:
        fin_barrel = open("barrel_apts_to_add.txt", "r")
except IOError, e:
        print "Error in file IO: ", e
        
if (shape == 1):
    	apts_to_add = pickle.load(fin_barrel)

	for apt_specs in apts_to_add:
		oligo_num = apt_specs[0]
		type = apt_specs[1]
		if (type == 1):
			# apt is pointing in so add 'I' as a flag at the end
			oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'I'
		elif (type == 2):
			# apt is pointing out so add 'O' as a flag
			oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'O'
		else:
			# incorrect type
			print 'Bad input - aptamer needs to be pointing in or out'

#####
# Add latches
#####

fin_barrel = None
fin_lid1 = None
fin_lid2 = None
fin_barrel_design2 = None
fin_lid1_design2 = None
fin_lid2_design2 = None
                
try:
        fin_barrel = open("barrel_latches_to_add.txt", "r")
	fin_lid1 = open("lid1_latches_to_add.txt", "r")
	fin_lid2 = open("lid2_latches_to_add.txt", "r")
	fin_barrel_design2 = open("barrel_latches_to_add_design2.txt", "r")
	fin_lid1_design2 = open("lid1_latches_to_add_design2.txt", "r")
	fin_lid2_design2 = open("lid2_latches_to_add_design2.txt", "r")
except IOError, e:
        print "Error in file IO: ", e


if (shape == 1):
        latches_to_add = pickle.load(fin_barrel)
	latches_to_add_d2 = pickle.load(fin_barrel_design2)
elif (shape == 2):
	# Ask which lid is being run
	print 'What lid are you running?'
	type = int(raw_input('Enter 1 if lid1, 2 if lid2: '))
	if (type == 1):
		latches_to_add = pickle.load(fin_lid1)
		latches_to_add_d2 = pickle.load(fin_lid1_design2)
	elif (type == 2):
		latches_to_add = pickle.load(fin_lid2)
		latches_to_add_d2 = pickle.load(fin_lid2_design2)

        
# note: latch sequence includes any linker Ts already
for latch_specs in latches_to_add:
	oligo_num = latch_specs[0]
        latch = latch_specs[1]
	oligo_ra[oligo_num] = oligo_ra[oligo_num] + latch

for latch_specs in latches_to_add_d2:
	oligo_num = latch_specs[0]
	latch = latch_specs[1]
	latch_2_oligo_ra[oligo_num] = latch_2_oligo_ra[oligo_num] + latch


#####
# oligo sorting
#####
#
barrel_core = [0, 3, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 20, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77]
barrel_latch = [21, 27, 43, 60] 
barrel_aptamer_out = [2, 7, 17, 23] 
barrel_aptamer_in = [1, 11, 36, 47]
lid_core = [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34]
lid_latch = [2, 29]

# output all oligos
output_file = file('lid_1_all_oligos.txt', 'w')
for oligo in oligo_ra:
	output_file.write(oligo + '\n')
output_file.close()

if (shape == 1):
	# barrel output
	
	barrel_oligos_file = file('barrel_oligos_sorted.txt', 'w')
	# output barrel core oligos
	barrel_oligos_file.write('Core Oligos' + '\n')
	barrel_oligos_file.close()
	# to append the file needs to be reopened in append mode.
	barrel_oligos_file = file('barrel_oligos_sorted.txt', 'a')

	for oligo_num in barrel_core:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

	# out + apts
	barrel_oligos_file.write('\n' + 'Out + Apts' + '\n')
	for oligo_num in barrel_aptamer_out:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')
	# out - apts
	barrel_oligos_file.write('\n' + 'Out - Apts' + '\n')
	for oligo_num in barrel_aptamer_out:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')
	
	# in - apts
	barrel_oligos_file.write('\n' + 'In - Apts' + '\n')
	for oligo_num in barrel_aptamer_in:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')

	# in + apts
	barrel_oligos_file.write('\n' + 'In + Apts' +'\n')
	for oligo_num in barrel_aptamer_in:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')
	
	# barrel oligos with latches - two oligo design
	barrel_oligos_file.write('\n' + 'Oligos with Latches - Design 1' + '\n')
	for oligo_num in barrel_latch:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')
	
	# barrel oligos with latches - four oligo design
	barrel_oligos_file.write('\n' + 'Oligos with Latches - Design 2\n')
	for oligo_num in barrel_latch:
		barrel_oligos_file.write(str(oligo_num) + ' Design 2: ' + latch_2_oligo_ra[oligo_num] + '\n')

	# barrel oligos replacing all latches
	barrel_oligos_file.write('\n' + 'Oligos replacing all latches\n')
	for oligo_num in barrel_latch:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')

	
	barrel_oligos_file.close()


if (shape == 2):
	# lid output

	# output lid core oligos
	# we first open the file for writing because we want to clear 
	# anything that was there before. Then we close it and reopen it
	# in append mode.
	lid_oligos_file = file('lid1_oligos_sorted.txt', 'w')
	lid_oligos_file.write('Lid Core' + '\n')
	lid_oligos_file.close()
	lid_oligos_file = file('lid1_oligos_sorted.txt', 'a')
	for oligo_num in lid_core:
		lid_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

	# output lid oligos with latches -- 2 oligo design
	lid_oligos_file.write('\nLid Latch Oligos - 2 Oligo Design\n')
	for oligo_num in lid_latch:
		lid_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

	# output lid oligos with latches -- 4 oligo design
	lid_oligos_file.write('\nLid Latch Oligos - 4 Oligo Design\n')
	for oligo_num in lid_latch:
		lid_oligos_file.write(str(oligo_num) + ' Design 2 : ' + latch_2_oligo_ra[oligo_num] + '\n')

	# output lid oligos that replace those with latches
	lid_oligos_file.write('\nLid Replacement Oligos - No Latches\n')
	for oligo_num in lid_latch:
		lid_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')
	
	lid_oligos_file.close()	

sys.stdout.write('\n\n\n\n\n\n')

Honeycomb_pointers_v1.py

#!/usr/bin/python

import sys






######
# This function reads in the node array from a text file
# It adds a border of nodes automatically
######
def read_text_format_node_array(filename):
	# Read in file
	input_file = file(filename, 'r')
	lines = input_file.readlines()
	input_file.close()
	row_string_ra = [line[:-1] for line in lines]

	
	# Check to make sure each line is the same length
	no_length_violation = True
	length = len(row_string_ra[0])
	for row_string in row_string_ra:
		if len(row_string) != length and len(row_string) != 0:
			sys.stdout.write('ERROR: Not all lines of inputted node lattice array are the same length.\n')
			sys.stdout.write('Length is ' + str(len(row_string)) + '\n')
			no_length_violation = False

		
	# Parse into pre node array
	pre_node_ra = []
	for row_string in row_string_ra:
		num_row_nodes = len(row_string)/3
		sub_pre_node_ra = []
		for row_node_num in range(num_row_nodes):
			sub_pre_node_ra.append(row_string[row_node_num*3:row_node_num*3 + 3])
		if sub_pre_node_ra != []:
			pre_node_ra.append(sub_pre_node_ra)	

	
	# Parse pre node array into node array
	num_rows = len(pre_node_ra)/2
	num_row_nodes = len(pre_node_ra[0])
	node_ra = [['.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)]]
	for row_num in range(num_rows):
		sub_node_ra = ['.']
		for row_node_num in range(num_row_nodes):
			pre_node_string = pre_node_ra[row_num*2 + (row_node_num + row_num)%2][row_node_num]
			if pre_node_string == '...':
				node = '.'
			else:
				node = int(pre_node_string)
			sub_node_ra.append(node)
		sub_node_ra.append('.')
		if num_row_nodes%2 == 1:
			sub_node_ra.append('.')			
		node_ra.append(sub_node_ra)
	node_ra.append(['.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)])

	
	# Check for parity violations
	num_parity_violations = 0
	for row_num in range(num_rows):
		for row_node_num in range(num_row_nodes):
			node = node_ra[row_num][row_node_num]
			if node != '.':
				if (node + row_num + row_node_num)%2 == 1:
					sys.stdout.write('ERROR: Parity violation for strand ' + str(node) + '.\n')
					sys.stdout.write('Parity is the row number plus the row-node number.\n')
					sys.stdout.write('Make sure even-numbered strands are on even-parity nodes.\n')
					sys.stdout.write('Make sure odd-numbered strands are on odd-parity nodes.\n')
					num_parity_violations += 1

	
	if num_parity_violations > 0 and no_length_violations == True:
		return
	else:
		return node_ra






######
# This function prints the node lattice array in a honeycomb format
######
def print_node_lattice_array(node_ra):
	sys.stdout.write('\nNODE LATTICE ARRAY\n')
	zeroes = '000'
	for y in range(len(node_ra)):
		even_row_string = ''
		odd_row_string = '   '
		for x in range(len(node_ra[0])):
			string_element = str(node_ra[y][x])
			if string_element == '.':
				string_element += '..'
			else:
				string_element = zeroes[:3 - len(string_element)] + string_element
			if x%2 == 0:
				even_row_string += string_element + '   '
			else:
				odd_row_string += string_element + '   '
		if y%2 == 0:
			sys.stdout.write(even_row_string + '\n')
			sys.stdout.write(odd_row_string + '\n')
		else:
			sys.stdout.write(odd_row_string + '\n')
			sys.stdout.write(even_row_string + '\n')
		sys.stdout.write('\n')
	sys.stdout.write('\n\n')

	return






######
# This function inputs the node array and the number of 42bp zones
# and returns a token pointer pair array
######
def token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag):
	# Initialize the offset array
	even_offset_ra = [[ 0,  1], [ 0, -1], [-1, 0]]
	odd_offset_ra  = [[ 0, -1], [ 0,  1], [ 1, 0]]
	offset_ra = [even_offset_ra, odd_offset_ra]

			
	#Initialize the token pointer pair array
	strand_list_ra = []
	for sub_node_ra in node_ra:
		for node in sub_node_ra:
			if node != '.' and strand_list_ra.count(node) == 0:
				strand_list_ra.append(node)
	num_strands = len(strand_list_ra)

	TPP_ra = []
	num_subzones = num_zones*6
	for strand_num in range(num_strands):
		sub_TPP_ra = []
		for token_num in range(num_subzones):
			if strand_num%2 == 0:
				previous_TP = [strand_num, (token_num + 1)%num_subzones]
				if previous_TP[1] == 0 and periodic_structure_flag == False:
					previous_TP[1] = -1
				next_TP = [strand_num, (token_num - 1)%num_subzones]
				if next_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
					next_TP[1] = -1
				sub_TPP_ra.append([previous_TP, next_TP])
			else:
				previous_TP = [strand_num, (token_num - 1)%num_subzones]
				if previous_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
					previous_TP[1] = -1
				next_TP = [strand_num, (token_num + 1)%num_subzones]
				if next_TP[1] == 0 and periodic_structure_flag == False:
					next_TP[1] = -1
				sub_TPP_ra.append([previous_TP, next_TP])
		TPP_ra.append(sub_TPP_ra)


	# Introduce crossovers based on the node array
	for donor_y in range(len(node_ra)):
		for donor_x in range(len(node_ra[0])):
			donor_strand_num = node_ra[donor_y][donor_x]
			if donor_strand_num != '.':
				for position in range(3):
					parity = (donor_y + donor_x)%2
					acceptor_y = donor_y + offset_ra[parity][position][0]
					acceptor_x = donor_x + offset_ra[parity][position][1]
					acceptor_strand_num = node_ra[acceptor_y][acceptor_x]
					if acceptor_strand_num != '.':
						for zone_num in range(num_zones):
							subzone_num = zone_num*6 + position*2 + 1 - parity
							TPP_ra[donor_strand_num][subzone_num][1] = [acceptor_strand_num, subzone_num]
							TPP_ra[acceptor_strand_num][subzone_num][0] = [donor_strand_num, subzone_num]
							
	return TPP_ra






######
# This function inputs the token pointer pair array
# and returns an array of token_pointer_paths
######
def token_pointer_path_array(TPP_ra):
	sub_visits_ra = [0 for i in range(len(TPP_ra[0]))]
	visits_ra = [sub_visits_ra[:] for i in range(len(TPP_ra))]
	path_ra = []


	for strand_num in range(len(TPP_ra)):
		for subzone_num in range(len(TPP_ra[0])):
			if visits_ra[strand_num][subzone_num] == 0:	
				previous_TP = TPP_ra[strand_num][subzone_num][0]
				next_TP = TPP_ra[strand_num][subzone_num][1]
				num_visits = 100 * len(path_ra) + 1
				visits_ra[strand_num][subzone_num] = num_visits
				sub_path_ra = [[strand_num, subzone_num]]
							
				upstream_done = False
				while not upstream_done:
					if previous_TP[1] == -1:
						upstream_done = True
					elif visits_ra[previous_TP[0]][previous_TP[1]] > 0:
						upstream_done = True
					else:
						sub_path_ra.insert(0, previous_TP)
						num_visits += 1
						visits_ra[previous_TP[0]][previous_TP[1]] = num_visits
						previous_TP = TPP_ra[previous_TP[0]][previous_TP[1]][0]
			
				downstream_done = False
				while not downstream_done:
					if next_TP[1] == -1:
						downstream_done = True
					elif visits_ra[next_TP[0]][next_TP[1]] > 0:
						downstream_done = True
					else:
						sub_path_ra.append(next_TP)
						num_visits += 1
						visits_ra[next_TP[0]][next_TP[1]] = num_visits
						next_TP = TPP_ra[next_TP[0]][next_TP[1]][1]
			
				# Make sure that the path begins between subzones, not in the middle of a subzone
				if (sub_path_ra[0][0] + sub_path_ra[0][1])%2 == 0:
					sub_path_ra = sub_path_ra[1:] + sub_path_ra[:1]
				path_ra.append(sub_path_ra)



	num_tokens_visited = 0
	for sub_path_ra in path_ra:
		num_tokens_visited += len(sub_path_ra)

	sys.stdout.write('The number of tokens visited is ' + str(num_tokens_visited) + '.\n')
			

	path_length_ra = [0 for i in range(1100)]
	for sub_path_ra in path_ra:
		path_length_ra[len(sub_path_ra)] += 1

	for length in range(1100):
		if path_length_ra[length] != 0:
			sys.stdout.write('The number of paths with length ' + str(length) + ' is ' + str(path_length_ra[length]) + '.\n')
	
	return path_ra	






######
# This function inputs the token pointer path array
# and returns a list of oligos as lists of six token pointers
######
def oligo_token_pointer_array(path_ra):
	OTP_ra = []
	for sub_path_ra in path_ra:
		for oligo_num in range(len(sub_path_ra)/6):
			OTP_ra.append(sub_path_ra[oligo_num*6:oligo_num*6 + 6])
		# Take care of the remainders
		if len(sub_path_ra)%6 != 0:
			OTP_ra.append(sub_path_ra[-(len(sub_path_ra)%6):])
	return OTP_ra






######
# This function checks to make sure each token is represented once and only once
# in the oligo token pointer lists
######
def check_token_representation(OTP_ra, TPP_ra):
	CTP_ra = []
	for sub_OTP_ra in OTP_ra:
		for TP in sub_OTP_ra:
			CTP_ra.append(TP)

	num_strands = len(TPP_ra)
	num_subzones = len(TPP_ra[0])
	problems = False

	# Check each token for single presence
	for strand_num in range(num_strands):
		for token_num in range(num_subzones):
			if CTP_ra.count([strand_num, token_num]) == 0:
				sys.stdout.write(str([strand_num, token_num]) + ' not present.\n')
				problems = True
			elif CTP_ra.count([strand_num, token_num]) > 1:
				sys.stdout.write(str([strand_num, token_num]) + ' present more than once.\n')
				problems = True
	
	if problems == False:
		sys.stdout.write('Each token is represented once and only once in the oligo token pointer lists.\n')

	return
	





######
# This function prints the oligo path on the strand token lattice
# It is fun to see how the paths twist around the lattice
# You can use this function to help debug your program
######
def print_path(sub_path_ra, TPP_ra):
	sys.stdout.write('\nONE PATH ARRAY\n')

	num_strands = len(TPP_ra)
	num_subzones = len(TPP_ra[0])
	num_path_tokens = len(sub_path_ra)
	
	# Initialize strand token lattice
	sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
	token_visit_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]


	# Assign visits
	for path_token_num in range(num_path_tokens):
		token = sub_path_ra[path_token_num]
		strand = token[0]
		subzone = token[1]
		token_visit_ra[strand][subzone] = path_token_num

	# Print out strand token lattice
	spacer = '   '
	for strand_num in range(num_strands):
		for subzone_num in range(num_subzones):
			visitor_string = str(token_visit_ra[strand_num][subzone_num])
			sys.stdout.write(visitor_string)
			sys.stdout.write(spacer[:4 - len(visitor_string)])
		sys.stdout.write('\n')
	
	sys.stdout.write('\n')
	
	return
	
# The idea here is to have a function that adds the numbers of one oligo path 
# to the appropriate places in the big grid array. Eventually this will be printed
# in main. Also it needs to be initialized in main. Oligo_path is the path of 
# one oligo, while grid_ra is the grid that is constantly being updated until 
# it is printed in main. oligo_num is number that will be inputed to the grid_ra.

def generate_oligo_path(oligo_path, oligo_num, grid_ra):
	num_path_tokens = len(oligo_path)
        
# Assign visits
        for path_token_num in range(num_path_tokens):
                token = oligo_path[path_token_num]
                strand = token[0]
                subzone = token[1]
                grid_ra[strand][subzone] = oligo_num
                
        
	return grid_ra


def print_all_oligos(grid_ra, num_strands, num_subzones):
        spacer = '   '
        for strand_num in range(num_strands):
                for subzone_num in range(num_subzones):
                        visitor_string = str(grid_ra[strand_num][subzone_num])
                        sys.stdout.write(visitor_string)
                        sys.stdout.write(spacer[:4 - len(visitor_string)])
                sys.stdout.write('\n')
        


####
# given an oligo to split, split it and return the new list of oligos
####
def split_oligo(new_OTP_ra, oligo_num, num_toks):

	print new_OTP_ra[oligo_num]

	original_oligo = new_OTP_ra[oligo_num]

	oligo_1 = original_oligo[:num_toks]
	oligo_2 = original_oligo[num_toks:]
	print oligo_1
	print'\n'
	print oligo_2
	
	new_OTP_ra[oligo_num] = oligo_1
	new_OTP_ra.insert(oligo_num + 1, oligo_2)
	return new_OTP_ra


sys.stdout.write('Honeycomb pointers module installed.\n')