IGEM:Harvard/2006/Container Design 4/Python Code/Full Code Final

= Modified Versions of William's Code - Finalized 7/11/06 =

Main

 * 1) !/usr/bin/python

import sys import re import pickle from honeycomb_pointers_v1 import * from pointers_to_sequences_v1 import *
 * 1) for regular expression matching
 * 1) for reading structures in from files

filename = raw_input('Enter the filename for the text-based node lattice array: ') num_zones = int(raw_input('Enter the number of 42bp zones per double helix: ')) periodic_structure_flag = False
 * 1) filename = 'lid_1_ascii.txt'
 * 2) num_zones = 3

node_ra = read_text_format_node_array(filename) print_node_lattice_array(node_ra) TPP_ra = token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag) path_ra = token_pointer_path_array(TPP_ra) OTP_ra = oligo_token_pointer_array(path_ra) check_token_representation(OTP_ra, TPP_ra)

longest_path = [] for path in path_ra: if len(path) > len(longest_path): longest_path = path print_path(longest_path, TPP_ra)
 * 1) Print out the longest path, just for fun

fin_barrel = None fin_lid = None
 * 1) Oligo splitting - this time reading from a file and not asking for user
 * 2) input
 * 1) input

try: fin_barrel = open("barrel_oligos_to_split.txt", "r") fin_lid = open("lid_oligos_to_split.txt", "r") except IOError, e:	print "Error in file IO: ", e


 * 1) Ask the user if they are running a lid or a barrel

shape = int(raw_input("Enter 1 if you are running a barrel, 2 if lid: ")) if (shape == 1): oligos_to_split = pickle.load(fin_barrel) elif (shape == 2): oligos_to_split = pickle.load(fin_lid) else: print 'Please modify code or run with lid or 30hb barrel'

new_OTP_ra = OTP_ra[:] for pair in oligos_to_split: oligo_num = pair[0] print oligo_num num_toks = pair[1] print num_toks new_OTP_ra = split_oligo(new_OTP_ra, oligo_num, num_toks)

print new_OTP_ra print len(OTP_ra) print len(new_OTP_ra)


 * 1) if it's the barrel design and so that all the numbers aren't messed
 * up, the 7bp token on the start of strand 10 needs to be removed
 * 1) because it's going to be left unpaired

if (shape == 1): new_OTP_ra = new_OTP_ra[:61] + new_OTP_ra[62:]

if fin_barrel: fin_barrel.close if fin_lid: fin_lid.close


 * 1) generate and print the oligo token grid
 * 1) generate and print the oligo token grid

num_strands = len(TPP_ra) num_subzones = len(TPP_ra[0])
 * 1) Initialize the grid with all periods

sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)] grid_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]

oligo_num = 0 for oligo in new_OTP_ra: grid_ra = generate_oligo_path(oligo, oligo_num, grid_ra) oligo_num = oligo_num + 1 print grid_ra

print_all_oligos(grid_ra, num_strands, num_subzones)

strand_ra = strand_array('lid_1_scaffold.txt', TPP_ra) token_ra = token_array(strand_ra) oligo_ra = oligo_array(new_OTP_ra, token_ra)
 * 1) Generate oligo sequences
 * 1) Generate oligo sequences

original_oligo_ra = oligo_ra[:] latch_2_oligo_ra = oligo_ra[:]
 * 1) The array of oligos that does have the oligos split but does not have the
 * 2) aptamers or latches added.


 * 1) Add apts this time using file input instead of user input
 * 1) Add apts this time using file input instead of user input

apt_seq = 'GGTTGGTGTGGTTGG' T_linker = 'TTT'
 * 1) Constants

fin_barrel = None try: fin_barrel = open("barrel_apts_to_add.txt", "r") except IOError, e:       print "Error in file IO: ", e if (shape == 1): apts_to_add = pickle.load(fin_barrel)

for apt_specs in apts_to_add: oligo_num = apt_specs[0] type = apt_specs[1] if (type == 1): # apt is pointing in so add 'I' as a flag at the end oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'I'		elif (type == 2): # apt is pointing out so add 'O' as a flag oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'O'		else: # incorrect type print 'Bad input - aptamer needs to be pointing in or out'


 * 1) Add latches
 * 1) Add latches

fin_barrel = None fin_lid1 = None fin_lid2 = None fin_barrel_design2 = None fin_lid1_design2 = None fin_lid2_design2 = None try: fin_barrel = open("barrel_latches_to_add.txt", "r") fin_lid1 = open("lid1_latches_to_add.txt", "r") fin_lid2 = open("lid2_latches_to_add.txt", "r") fin_barrel_design2 = open("barrel_latches_to_add_design2.txt", "r") fin_lid1_design2 = open("lid1_latches_to_add_design2.txt", "r") fin_lid2_design2 = open("lid2_latches_to_add_design2.txt", "r") except IOError, e:       print "Error in file IO: ", e

if (shape == 1): latches_to_add = pickle.load(fin_barrel) latches_to_add_d2 = pickle.load(fin_barrel_design2) elif (shape == 2): # Ask which lid is being run print 'What lid are you running?' type = int(raw_input('Enter 1 if lid1, 2 if lid2: ')) if (type == 1): latches_to_add = pickle.load(fin_lid1) latches_to_add_d2 = pickle.load(fin_lid1_design2) elif (type == 2): latches_to_add = pickle.load(fin_lid2) latches_to_add_d2 = pickle.load(fin_lid2_design2)

for latch_specs in latches_to_add: oligo_num = latch_specs[0] latch = latch_specs[1] oligo_ra[oligo_num] = oligo_ra[oligo_num] + latch
 * 1) note: latch sequence includes any linker Ts already

for latch_specs in latches_to_add_d2: oligo_num = latch_specs[0] latch = latch_specs[1] latch_2_oligo_ra[oligo_num] = latch_2_oligo_ra[oligo_num] + latch

barrel_core = [0, 3, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 20, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77] barrel_latch = [21, 27, 43, 60] barrel_aptamer_out = [2, 7, 17, 23] barrel_aptamer_in = [1, 11, 36, 47] lid_core = [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34] lid_latch = [2, 29]
 * 1) oligo sorting
 * 1) oligo sorting

output_file = file('lid_1_all_oligos.txt', 'w') for oligo in oligo_ra: output_file.write(oligo + '\n') output_file.close
 * 1) output all oligos

if (shape == 1): # barrel output barrel_oligos_file = file('barrel_oligos_sorted.txt', 'w') # output barrel core oligos barrel_oligos_file.write('Core Oligos' + '\n') barrel_oligos_file.close # to append the file needs to be reopened in append mode. barrel_oligos_file = file('barrel_oligos_sorted.txt', 'a')

for oligo_num in barrel_core: barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

# out + apts barrel_oligos_file.write('\n' + 'Out + Apts' + '\n') for oligo_num in barrel_aptamer_out: barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n') # out - apts barrel_oligos_file.write('\n' + 'Out - Apts' + '\n') for oligo_num in barrel_aptamer_out: barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n') # in - apts barrel_oligos_file.write('\n' + 'In - Apts' + '\n') for oligo_num in barrel_aptamer_in: barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')

# in + apts barrel_oligos_file.write('\n' + 'In + Apts' +'\n') for oligo_num in barrel_aptamer_in: barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n') # barrel oligos with latches - two oligo design barrel_oligos_file.write('\n' + 'Oligos with Latches - Design 1' + '\n') for oligo_num in barrel_latch: barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n') # barrel oligos with latches - four oligo design barrel_oligos_file.write('\n' + 'Oligos with Latches - Design 2\n') for oligo_num in barrel_latch: barrel_oligos_file.write(str(oligo_num) + ' Design 2: ' + latch_2_oligo_ra[oligo_num] + '\n')

# barrel oligos replacing all latches barrel_oligos_file.write('\n' + 'Oligos replacing all latches\n') for oligo_num in barrel_latch: barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')

barrel_oligos_file.close

if (shape == 2): # lid output

# output lid core oligos # we first open the file for writing because we want to clear # anything that was there before. Then we close it and reopen it # in append mode. lid_oligos_file = file('lid1_oligos_sorted.txt', 'w') lid_oligos_file.write('Lid Core' + '\n') lid_oligos_file.close lid_oligos_file = file('lid1_oligos_sorted.txt', 'a') for oligo_num in lid_core: lid_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

# output lid oligos with latches -- 2 oligo design lid_oligos_file.write('\nLid Latch Oligos - 2 Oligo Design\n') for oligo_num in lid_latch: lid_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

# output lid oligos with latches -- 4 oligo design lid_oligos_file.write('\nLid Latch Oligos - 4 Oligo Design\n') for oligo_num in lid_latch: lid_oligos_file.write(str(oligo_num) + ' Design 2 : ' + latch_2_oligo_ra[oligo_num] + '\n')

# output lid oligos that replace those with latches lid_oligos_file.write('\nLid Replacement Oligos - No Latches\n') for oligo_num in lid_latch: lid_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n') lid_oligos_file.close

sys.stdout.write('\n\n\n\n\n\n')

Honeycomb_pointers_v1.py

 * 1) !/usr/bin/python

import sys

def read_text_format_node_array(filename): # Read in file input_file = file(filename, 'r') lines = input_file.readlines input_file.close row_string_ra = [line[:-1] for line in lines]
 * 1) This function reads in the node array from a text file
 * 2) It adds a border of nodes automatically
 * 1) It adds a border of nodes automatically

# Check to make sure each line is the same length no_length_violation = True length = len(row_string_ra[0]) for row_string in row_string_ra: if len(row_string) != length and len(row_string) != 0: sys.stdout.write('ERROR: Not all lines of inputted node lattice array are the same length.\n') sys.stdout.write('Length is ' + str(len(row_string)) + '\n') no_length_violation = False

# Parse into pre node array pre_node_ra = [] for row_string in row_string_ra: num_row_nodes = len(row_string)/3 sub_pre_node_ra = [] for row_node_num in range(num_row_nodes): sub_pre_node_ra.append(row_string[row_node_num*3:row_node_num*3 + 3]) if sub_pre_node_ra != []: pre_node_ra.append(sub_pre_node_ra)

# Parse pre node array into node array num_rows = len(pre_node_ra)/2 num_row_nodes = len(pre_node_ra[0]) node_ra = '.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2) for row_num in range(num_rows): sub_node_ra = ['.'] for row_node_num in range(num_row_nodes): pre_node_string = pre_node_ra[row_num*2 + (row_node_num + row_num)%2][row_node_num] if pre_node_string == '...': node = '.' else: node = int(pre_node_string) sub_node_ra.append(node) sub_node_ra.append('.') if num_row_nodes%2 == 1: sub_node_ra.append('.') node_ra.append(sub_node_ra) node_ra.append(['.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)])

# Check for parity violations num_parity_violations = 0 for row_num in range(num_rows): for row_node_num in range(num_row_nodes): node = node_ra[row_num][row_node_num] if node != '.': if (node + row_num + row_node_num)%2 == 1: sys.stdout.write('ERROR: Parity violation for strand ' + str(node) + '.\n') sys.stdout.write('Parity is the row number plus the row-node number.\n') sys.stdout.write('Make sure even-numbered strands are on even-parity nodes.\n') sys.stdout.write('Make sure odd-numbered strands are on odd-parity nodes.\n') num_parity_violations += 1

if num_parity_violations > 0 and no_length_violations == True: return else: return node_ra

def print_node_lattice_array(node_ra): sys.stdout.write('\nNODE LATTICE ARRAY\n') zeroes = '000' for y in range(len(node_ra)): even_row_string = '' odd_row_string = '  ' for x in range(len(node_ra[0])): string_element = str(node_ra[y][x]) if string_element == '.': string_element += '..' else: string_element = zeroes[:3 - len(string_element)] + string_element if x%2 == 0: even_row_string += string_element + '  ' else: odd_row_string += string_element + '  ' if y%2 == 0: sys.stdout.write(even_row_string + '\n') sys.stdout.write(odd_row_string + '\n') else: sys.stdout.write(odd_row_string + '\n') sys.stdout.write(even_row_string + '\n') sys.stdout.write('\n') sys.stdout.write('\n\n')
 * 1) This function prints the node lattice array in a honeycomb format
 * 1) This function prints the node lattice array in a honeycomb format

return

def token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag): # Initialize the offset array even_offset_ra = 0,  1], [ 0, -1], [-1, 0 odd_offset_ra =  0, -1], [ 0,  1], [ 1, 0 offset_ra = [even_offset_ra, odd_offset_ra]
 * 1) This function inputs the node array and the number of 42bp zones
 * 2) and returns a token pointer pair array
 * 1) and returns a token pointer pair array

#Initialize the token pointer pair array strand_list_ra = [] for sub_node_ra in node_ra: for node in sub_node_ra: if node != '.' and strand_list_ra.count(node) == 0: strand_list_ra.append(node) num_strands = len(strand_list_ra)

TPP_ra = [] num_subzones = num_zones*6 for strand_num in range(num_strands): sub_TPP_ra = [] for token_num in range(num_subzones): if strand_num%2 == 0: previous_TP = [strand_num, (token_num + 1)%num_subzones] if previous_TP[1] == 0 and periodic_structure_flag == False: previous_TP[1] = -1 next_TP = [strand_num, (token_num - 1)%num_subzones] if next_TP[1] == (num_subzones - 1) and periodic_structure_flag == False: next_TP[1] = -1 sub_TPP_ra.append([previous_TP, next_TP]) else: previous_TP = [strand_num, (token_num - 1)%num_subzones] if previous_TP[1] == (num_subzones - 1) and periodic_structure_flag == False: previous_TP[1] = -1 next_TP = [strand_num, (token_num + 1)%num_subzones] if next_TP[1] == 0 and periodic_structure_flag == False: next_TP[1] = -1 sub_TPP_ra.append([previous_TP, next_TP]) TPP_ra.append(sub_TPP_ra)

# Introduce crossovers based on the node array for donor_y in range(len(node_ra)): for donor_x in range(len(node_ra[0])): donor_strand_num = node_ra[donor_y][donor_x] if donor_strand_num != '.': for position in range(3): parity = (donor_y + donor_x)%2 acceptor_y = donor_y + offset_ra[parity][position][0] acceptor_x = donor_x + offset_ra[parity][position][1] acceptor_strand_num = node_ra[acceptor_y][acceptor_x] if acceptor_strand_num != '.': for zone_num in range(num_zones): subzone_num = zone_num*6 + position*2 + 1 - parity TPP_ra[donor_strand_num][subzone_num][1] = [acceptor_strand_num, subzone_num] TPP_ra[acceptor_strand_num][subzone_num][0] = [donor_strand_num, subzone_num] return TPP_ra

def token_pointer_path_array(TPP_ra): sub_visits_ra = [0 for i in range(len(TPP_ra[0]))] visits_ra = [sub_visits_ra[:] for i in range(len(TPP_ra))] path_ra = []
 * 1) This function inputs the token pointer pair array
 * 2) and returns an array of token_pointer_paths
 * 1) and returns an array of token_pointer_paths

for strand_num in range(len(TPP_ra)): for subzone_num in range(len(TPP_ra[0])): if visits_ra[strand_num][subzone_num] == 0: previous_TP = TPP_ra[strand_num][subzone_num][0] next_TP = TPP_ra[strand_num][subzone_num][1] num_visits = 100 * len(path_ra) + 1 visits_ra[strand_num][subzone_num] = num_visits sub_path_ra = strand_num, subzone_num upstream_done = False while not upstream_done: if previous_TP[1] == -1: upstream_done = True elif visits_ra[previous_TP[0]][previous_TP[1]] > 0: upstream_done = True else: sub_path_ra.insert(0, previous_TP) num_visits += 1 visits_ra[previous_TP[0]][previous_TP[1]] = num_visits previous_TP = TPP_ra[previous_TP[0]][previous_TP[1]][0] downstream_done = False while not downstream_done: if next_TP[1] == -1: downstream_done = True elif visits_ra[next_TP[0]][next_TP[1]] > 0: downstream_done = True else: sub_path_ra.append(next_TP) num_visits += 1 visits_ra[next_TP[0]][next_TP[1]] = num_visits next_TP = TPP_ra[next_TP[0]][next_TP[1]][1] # Make sure that the path begins between subzones, not in the middle of a subzone if (sub_path_ra[0][0] + sub_path_ra[0][1])%2 == 0: sub_path_ra = sub_path_ra[1:] + sub_path_ra[:1] path_ra.append(sub_path_ra)

num_tokens_visited = 0 for sub_path_ra in path_ra: num_tokens_visited += len(sub_path_ra)

sys.stdout.write('The number of tokens visited is ' + str(num_tokens_visited) + '.\n')

path_length_ra = [0 for i in range(1100)] for sub_path_ra in path_ra: path_length_ra[len(sub_path_ra)] += 1

for length in range(1100): if path_length_ra[length] != 0: sys.stdout.write('The number of paths with length ' + str(length) + ' is ' + str(path_length_ra[length]) + '.\n') return path_ra

def oligo_token_pointer_array(path_ra): OTP_ra = [] for sub_path_ra in path_ra: for oligo_num in range(len(sub_path_ra)/6): OTP_ra.append(sub_path_ra[oligo_num*6:oligo_num*6 + 6]) # Take care of the remainders if len(sub_path_ra)%6 != 0: OTP_ra.append(sub_path_ra[-(len(sub_path_ra)%6):]) return OTP_ra
 * 1) This function inputs the token pointer path array
 * 2) and returns a list of oligos as lists of six token pointers
 * 1) and returns a list of oligos as lists of six token pointers

def check_token_representation(OTP_ra, TPP_ra): CTP_ra = [] for sub_OTP_ra in OTP_ra: for TP in sub_OTP_ra: CTP_ra.append(TP)
 * 1) This function checks to make sure each token is represented once and only once
 * 2) in the oligo token pointer lists
 * 1) in the oligo token pointer lists

num_strands = len(TPP_ra) num_subzones = len(TPP_ra[0]) problems = False

# Check each token for single presence for strand_num in range(num_strands): for token_num in range(num_subzones): if CTP_ra.count([strand_num, token_num]) == 0: sys.stdout.write(str([strand_num, token_num]) + ' not present.\n') problems = True elif CTP_ra.count([strand_num, token_num]) > 1: sys.stdout.write(str([strand_num, token_num]) + ' present more than once.\n') problems = True if problems == False: sys.stdout.write('Each token is represented once and only once in the oligo token pointer lists.\n')

return

def print_path(sub_path_ra, TPP_ra): sys.stdout.write('\nONE PATH ARRAY\n')
 * 1) This function prints the oligo path on the strand token lattice
 * 2) It is fun to see how the paths twist around the lattice
 * 3) You can use this function to help debug your program
 * 1) You can use this function to help debug your program

num_strands = len(TPP_ra) num_subzones = len(TPP_ra[0]) num_path_tokens = len(sub_path_ra) # Initialize strand token lattice sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)] token_visit_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]

# Assign visits for path_token_num in range(num_path_tokens): token = sub_path_ra[path_token_num] strand = token[0] subzone = token[1] token_visit_ra[strand][subzone] = path_token_num

# Print out strand token lattice spacer = '  ' for strand_num in range(num_strands): for subzone_num in range(num_subzones): visitor_string = str(token_visit_ra[strand_num][subzone_num]) sys.stdout.write(visitor_string) sys.stdout.write(spacer[:4 - len(visitor_string)]) sys.stdout.write('\n') sys.stdout.write('\n') return
 * 1) The idea here is to have a function that adds the numbers of one oligo path
 * 2) to the appropriate places in the big grid array. Eventually this will be printed
 * 3) in main. Also it needs to be initialized in main. Oligo_path is the path of
 * 4) one oligo, while grid_ra is the grid that is constantly being updated until
 * 5) it is printed in main. oligo_num is number that will be inputed to the grid_ra.

def generate_oligo_path(oligo_path, oligo_num, grid_ra): num_path_tokens = len(oligo_path) for path_token_num in range(num_path_tokens): token = oligo_path[path_token_num] strand = token[0] subzone = token[1] grid_ra[strand][subzone] = oligo_num return grid_ra
 * 1) Assign visits

def print_all_oligos(grid_ra, num_strands, num_subzones): spacer = '  ' for strand_num in range(num_strands): for subzone_num in range(num_subzones): visitor_string = str(grid_ra[strand_num][subzone_num]) sys.stdout.write(visitor_string) sys.stdout.write(spacer[:4 - len(visitor_string)]) sys.stdout.write('\n')

def split_oligo(new_OTP_ra, oligo_num, num_toks):
 * 1) given an oligo to split, split it and return the new list of oligos
 * 1) given an oligo to split, split it and return the new list of oligos

print new_OTP_ra[oligo_num]

original_oligo = new_OTP_ra[oligo_num]

oligo_1 = original_oligo[:num_toks] oligo_2 = original_oligo[num_toks:] print oligo_1 print'\n' print oligo_2 new_OTP_ra[oligo_num] = oligo_1 new_OTP_ra.insert(oligo_num + 1, oligo_2) return new_OTP_ra

sys.stdout.write('Honeycomb pointers module installed.\n')