Qualtofa.py

Copy and past the following into a text editor and save it as qualtofa.py to use the script.
 * 1) !/usr/bin/env python

import os, string, sys, time, copy from datetime import * from optparse import * from subprocess import * commandLine = copy.deepcopy(sys.argv) cwd = os.getcwd timeStamp = datetime.now.ctime.replace(' ','-').replace(':','-').replace('--','-') #Ex: 'Mon-Jun-14-11-08-55-2010' cntgMatchRanges = None progName, progVersion = ('qualtofa.py','2.52') progUsage = 'python %s [options] <.qual file>' % progName printLog = []
 * 1) Imports / Variable Initilization##########################################################################################

def errorExit(message=None,exitStatus=1): '''Version 1.0 Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)''' if message: print '%s: Error: %s' % (progName, message) else: print progName +': Unknown error, printing debug log...' for line in debugLog: print line  #print debug log sys.exit(exitStatus)

def mkRange(numList): Function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10) outList = [] if len(numList) > 0: start  = numList[0] current = numList[0] numList.append(numList[-1]) for num in numList[1:]: if (num - 1) != current: outList.append([start,current]) start = num current = num return outList

def readLFile(filePath, numLinesToRead = None): fileHandle = open(filePath, 'r') if numLinesToRead is None: fileData = fileHandle.readlines else: fileData = [fileHandle.readline for count in range(0,numLinesToRead)] fileHandle.close return fileData

def writeLFile(filePath, fileData): fileHandle = open(filePath, 'w') for line in fileData: fileHandle.write(fileData) fileHandle.close

def saveQualStandard(filePath, contigs, qualValues, headers, schema=None): if schema is None: schema = ['Position in the contig(1 based)', 'Called base', "Number of reads suppoting 'A'", "Number of reads supporting 'C'", "Number of reads supporting 'G'", "Number of reads supporting 'T'", "Number of reads supporting '_'", 'Coverage'] if len(contigs) == 0: errorExit('saveQualStandard: empty list supplied') if len(contigs) != len(qualValues) or len(contigs) != len(headers): errorExit('saveQualStandard: Unequal number of entries is input list') outHandle = open(filePath, 'w') outHandle.write('Schema:'+ '\n' + '\t'.join(schema) + '\n') for cIndex in range(0,len(contigs)): outHandle.write(headers[cIndex] + '\n') for bIndex in range(0,len(contigs[cIndex])): outHandle.write('\t'.join([contigs[cIdnex][bIndex]] + qualValues[cIndex]) + '\n') outHandle.close def parseQualStandard(filePath): columbWidth = 8 fileData = [line.strip.split('\t') for line in readLFile(filePath)]  #fileData is a list of lists representing the tab-deliminated structure of the .qual file if fileData[0] == ['Schema:']: del fileData[0]  #removes the unessesary first line of the file, if present schema = fileData[0]  #extract the tab-deliminated schema into a list headers, contigs, qualValues = ([], [], [])  #initilizes multiple lists at a time index = 0 for line in fileData[1:]: if len(line) == 1:  #if it is a header headers.append(line[0]) contigs.append([]) qualValues.append([]) elif len(line) == columbWidth:  #if it is  quality value contigs[-1].append(line[1]) qualValues[-1].append([int(num) for num in line[2:]])  #converts all of the quality values into integer objects and saves them in qualvalues else: errorExit('parseQualStandard: Quality file at %s deviated from the expected format on line %d.' % (filePath, index)) index += 1 return (contigs, qualValues, headers, schema)

def parseQualAlignment(filePath): columbWidth = 9 headColNum = 2 fileData = part.strip for part in line.split('\t')] for line in readLFile(filePath)]     #extracts the reference from the second columb qualData = [line[headColNum:] for line in fileData[2:]]  #removes the first lines and columbs of fileData, which dont contain unextracted data headers, contigs, cntgStarts, qualValues = ([], [], [], [])  #initilizes multiple lists at a time refPos = 0 for line in qualData: if len(line) > 0: for index in range(0,len(line),columbWidth): if headers.count(line[index]) == 0:  #if the contig header has not been encountered before headers.append(line[index]) cntgStarts.append(refPos) contigs.append([]) qualValues.append([]) headIndex = headers.index(line[index]) contigs[headIndex].append(line[index + 2])  #append the called base character to its contig qualValues[headIndex].append([int(num) for num in line[index + 3: index + columbWidth]])  #converts all of the quality values into integer objects and saves them in qualvalues refPos += 1 alignmentIndexs = [[cntgStarts[index], cntgStarts[index] + len(contigs[index]) - 1] for index in range(0,len(cntgStarts))]   return (contigs, qualValues, headers, alignmentIndexs, reference, refHeader, schema)

def parseQual(filePath): columbWidth = 9 headColNum = 2 fileHead = readLFile(filePath, 2) if fileHead[0].strip == 'Schema:': del fileHead[0]  #removes the unessesary first line of the file, if present schema = fileHead[0].strip.split('\t') if schema[1] == 'Reference' and len(schema) % columbWidth == headColNum: return parseQualAlignment(filePath) elif len(schema) == 8: return parseQualStandard(filePath) else: errorExit('parseQual: The quality file at %s has an unreconized format based on the schema on line 2.' % filePath)

def listToStr(aList): out = '' for char in aList: out += char return out

def saveFasta(filePath, headers, sequences): outHandle = open(filePath, 'w') if len(headers) != 0 and len(sequences) != 0: if len(headers) != len(sequences): errorExit('saveFastaFile: different number of headers and sequences') if type(headers) == str and type(sequences) == str: headers = [headers] sequences = [sequences] if type(sequences[0]) == list: sequences = [listToStr(seq) for seq in sequences] for index in range(0,len(headers)): outHandle.write('>%s\n' % headers[index]) if type(sequences[index]) == str: sequences[index] = [sequences[index]] #if there is only one line of sequence for a given header, make it a one-item list for the next loop for seq in sequences[index]: outHandle.write(seq + '\n') outHandle.close

def parseFasta(filePath): headers = [] seqs = [] fileHandle = open(filePath, 'r') fileData = fileHandle.readlines + ['>']  #the additional header line is there make to following for loop process the last sequence fileHandle.close currSeq = '' for line in fileData: if line[0] == '>':  #if the line is a fasta header headers.append(line[1:].strip )  #the fisrt character is the '>' and the last is a newline '\n' seqs.append(currSeq) currSeq = '' else: currSeq += line.strip return (headers[:-1], seqs[1:])

def removeRepeats(aList): for entry in aList: if aList.count(entry) > 1: aList.remove(entry) return removeRepeats(aList) return aList

def maskingCallback(option, opt_str, value, parser): value = [] def floatable(str): try: float(str) return True except ValueError: return False for arg in parser.rargs: if arg[:2] == "--" and len(arg) > 2: break  # stop on --foo like options if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): break  # stop on -a, but not on -3 or -3.0 value.append(arg) if len(value) == 2: break if len(value) == 0: errorExit("option '%s' requires one or two arguments; none supplied." % option.dest) del parser.rargs[:len(value)] if len(value) == 1: value.append(0) value[0] = int(value[0]) value[1] = float(value[1]) if value[0] < 0: errorExit("The first argument of the option '%s' must be a positive integer; %d supplied." % (option.dest,value[0])) if value[1] < 0 or value[1] > 1: errorExit("The second argument of the option '%s' must be a decimal between 0 and 1; %f supplied." % (option.dest,value[1])) setattr(parser.values, option.dest, value)

cmndLineParser = OptionParser(usage=progUsage, version="Version %s" % progVersion) maksingGroup = OptionGroup(cmndLineParser, 'Coverage and Call Proportion Masking','The following options take one integer argument and one decimal argument between 0 and 1, if the second is not supplied it is assumed to be 0.') cmndLineParser.add_option(  "-c",   "--include-contigs",    action="store_true",    default=False,                                                             help="Include each contig on its own line. (Default: only output the consensus)") maksingGroup.add_option(  "-m",   "--mask-contigs",       action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contigs',      help="Set minimum coverage depth and call proportion for contig masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)") maksingGroup.add_option(  "-s",   "--mask-contig-SNPs",   action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contig_SNPs',  help="Set minimum coverage depth and call proportion for contig SNP masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)") maksingGroup.add_option(  "-M",   "--mask-consensus",     action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_consensus',    help="Set minimum coverage depth and call proportion for the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)") maksingGroup.add_option(  "-S",   "--mask-SNPs",          action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_SNPs',         help="Set minimum coverage depth and call proportion for SNPs in the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)") cmndLineParser.add_option_group(maksingGroup) cmndLineParser.add_option(  "-t",   "--end-trim",           action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs by a specified number of bases. (Default: 0)") cmndLineParser.add_option(  "-q",   "--end-trim-qual",      action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs that have a quality value less than the specified amount. This option is implemented after standard end trimming (see -t option). (Default: 0)") cmndLineParser.add_option(  "-l",   "--min-match-length",   action="store",         default=0,          type="int",    metavar="INT",      help="Only include contigs that have a least one match longer or as long as the specified amount. This is implemented after any end trimming (see -q and -t options). (Default: 0)") cmndLineParser.add_option(  "-n",   "--no-overlap",         action="store_true",    default=False,                                         help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping sequence, including unmatched sequence. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)") cmndLineParser.add_option(  "-a",   "--no-match-overlap",   action="store_true",    default=False,                                          help="Add deletions to the reference to accommodate overlapping matches. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)") cmndLineParser.add_option(  "-k",   "--keep-contained",     action="store_true",    default=False,                                         help="Include contained contigs in the alignment and consensus. (Default: save sequences of contained contigs to a separate file)") cmndLineParser.add_option(  "-p",   "--save-SNPs",          action="store_true",    default=False,                                         help="Save SNPs to a .qual file. (Default: Do not save SNP file)") cmndLineParser.add_option(  "-d",   "--dont-align-contigs", action="store_true",    default=False,                                         help="Do not align contigs to the reference. Requires the use of the -c modifier. (Default: align contigs to the reference)") cmndLineParser.add_option(  "-r",   "--external-ref",       action="store",         default=None,       type="string", metavar="PATH",     help="Specify a reference to use instead of the one contained in the quality file.") cmndLineParser.add_option(  "-v",   "--verbose",            action="store_true",    default=False,                                         help="Print progress updates and relevant statistics to the standard output. (Default: run silently)") cmndLineParser.add_option(  "-i",   "--save-run-info",      action="store_true",    default=False,     help="Save a txt file of anything printed to the screen. Requires the use of -v/--verbose. (Default: Do not save)") (options, args) = cmndLineParser.parse_args(commandLine) if options.include_contigs == False: if sum(options.mask_contigs) > 1: errorExit('The option -m/--mask-contigs requires the use of the option -c/--include-contigs.') if sum(options.mask_contig_SNPs) > 1: errorExit('The option -s/--mask-contig-SNPs requires the use of the option -c/--include-contigs.') if options.dont_align_contigs: errorExit('The option -d/--dont-align-contigs requires the use of the option -c/--include-contigs.') if len(args) == 1: cmndLineParser.print_help sys.exit(0) argNum = len(args) - 1  #counts the amount of arguments, negating the 'qualtofa' at the start of the command line if argNum != 1: errorExit('qualtofa takes exactly 1 argument; %d supplied' % argNum, 0) qualPath = args[1]  #The file path to the input file supplied by the user
 * 1) Command Line Parser#######################################################################################################

def verbose(toPrint): if options.verbose: print toPrint, printLog.append(toPrint)

verbose('qualtofa: Loading data from %s...' % qualPath) qualData = parseQual(qualPath) verbose('Complete\n') if len(qualData) == 4:  #if the quality file contains a list of unaligned contigs (e.g. one directly from yasra) isAlignment = False if options.no_match_overlap: errorExit('The option -a/--no-match-overlap requires an aligned quality consensous file, like that given by sumqual.py.') if any(options.mask_consensus): errorExit('The option -M/--mask-consensus requires an aligned quality consensous file, like that given by sumqual.py.') if any(options.mask_SNPs): errorExit('The option -S/--mask-SNPs requires an aligned quality consensous file, like that given by sumqual.py.') if any(options.mask_contig_SNPs): errorExit('The option -s/--mask-contig-SNPs requires an aligned quality consensous file, like that given by sumqual.py.') if options.keep_contained: errorExit('The option -k/--keep-contained requires an aligned quality consensous file, like that given by sumqual.py.') if options.min_match_length: errorExit('The option -l/--min-match-length requires an aligned quality consensous file, like that given by sumqual.py.') if options.save_SNPs: errorExit('The option -p/--save-SNPs requires an aligned quality consensous file, like that given by sumqual.py.') if options.no_overlap: errorExit('The option -n/--no-overlap requires an aligned quality consensous file, like that given by sumqual.py.') if options.external_ref: errorExit('The option -r/--external-ref requires an aligned quality consensous file, like that given by sumqual.py.') contigs, qualities, headers, schema = qualData elif len(qualData) == 7:  #if the quality file contains an alignment (i.e. from sumqual) isAlignment = True contigs, qualities, headers, alignmentIndexs, reference, refHeader, schema = qualData else: errorExit
 * 1) Quality file parsing######################################################################################################

if options.external_ref is not None: verbose('qualtofa: option -r/--external-ref: loading reference at %s...' % options.external_ref) (seqs,headers) = parseFasta(options.external_ref) reference, refHeader = seqs[0], headers[0] verbose('Complete\n')
 * 1) External reference parsing(-r option implimintation)######################################################################

def delContig(index): del contigs[index] del headers[index] del qualities[index] if isAlignment: del alignmentIndexs[index] if cntgMatchRanges is not None: del cntgMatchRanges[index]

def delContigs(indexes): if len(indexes) > 0: indexes.sort indexes.reverse indexes = removeRepeats(indexes) for index in indexes: delContig(index)  #deletes all contigs

def delBase(cntgIndex, baseIndex): del qualities[cntgIndex][baseIndex] del contigs[cntgIndex][baseIndex] if isAlignment: if baseIndex == 0: alignmentIndexs[cntgIndex][0] += 1 else: alignmentIndexs[cntgIndex][1] -= 1

def delBases(cntgIndex, baseIndexes): baseIndexes.sort baseIndexes.reverse for index in baseIndexes: delBase(cntgIndex, index)

if options.end_trim > 0: verbose('qualtofa: option -t/--end-trim: Trimming %d bases from both ends of each contig...' % options.end_trim) rejectIndexes = [index for index in range(0,len(contigs)) if len(contigs[index]) <= options.end_trim * 2]  #makes a list of all the indexs of contigs that are too short to be trimmed. delContigs(rejectIndexes)  #deletes all rejected contigs contigs = [cntg[options.end_trim : -options.end_trim] for cntg in contigs]  #trims all remaining contigs qualities = [qual[options.end_trim : -options.end_trim] for qual in qualities]  #trims all remaining contigs alignmentIndexs = [[indexRange[0] + options.end_trim,indexRange[1] - options.end_trim] for indexRange in alignmentIndexs]    if options.verbose:        verbose('Complete\n')        if len(rejectIndexes) > 0: verbose('option -t/--end-trim: %d contig(s) were removed from the analysis because they were shorter than twice the trim length of %d.\n' % (len(rejectIndexes),options.end_trim))
 * 1) Contig end Trimming (-q and -t option implimentation)#####################################################################

if options.end_trim_qual > 0: verbose('qualtofa: option -q/--end-trim-qual: Trimming contig ends until a base with a coverage of %d or more is reached...' % options.end_trim_qual) for index in range(0, len(contigs)): while len(qualities[index]) > 0 and qualities[index][0][-1] < options.end_trim_qual: delBase(index,0) while len(qualities[index]) > 0 and qualities[index][-1][-1] < options.end_trim_qual: delBase(index,-1) rejectIndexes = [index for index in range(0,len(qualities)) if len(qualities[index]) == 0] #makes a list of all the indexs of contigs that dont contain sequence after trimming delContigs(rejectIndexes)  #deletes all rejected contigs if options.verbose: verbose('Complete\n') if len(rejectIndexes) > 0: verbose('option -q/--end-trim-qual: %d contig(s) removed from the analysis because all of their sequence had quality values less than %d.\n' % (len(rejectIndexes),options.end_trim_qual))

if isAlignment: cntgMatchRanges = [mkRange([index for index in range(0,len(cntg)) if cntg[index].isupper]) for cntg in contigs] if options.min_match_length > 0: verbose('qualtofa: option -l/--min-match-length: filtering contigs without a match of at least %d base pairs...' % options.min_match_length) maxMatchLens = [max([match[1] - match[0] + 1 for match in matchList] + [0]) for matchList in cntgMatchRanges] rejectIndexes = [index for index in range(0,len(contigs)) if maxMatchLens[index] < options.min_match_length] delContigs(rejectIndexes) if options.verbose: verbose('Complete\n') if len(rejectIndexes) > 0: verbose('option -l/--min-match-length: %d contig(s) removed from the analysis due to the lack of a match %d base pairs long.\n' % (len(rejectIndexes),options.min_match_length))
 * 1) Contigs Match Coordinate Extraction (-l option implimentation)############################################################

if isAlignment: verbose('qualtofa: Searching for contained contigs...') matchRanges = cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0 for index in range(0,len(cntgMatchRanges))] containedIndexes = [[bIndex for bIndex in range(0,len(matchRanges)) if matchRanges[aIndex][0] <= matchRanges[bIndex][0] and matchRanges[aIndex][1] >= matchRanges[bIndex][1] and aIndex != bIndex] for aIndex in range(0,len(matchRanges))]   containedIndexes = [index for contained in containedIndexes for index in contained]    containedContigs = [(headers[index], contigs[index]) for index in containedIndexes]    verbose('Complete\n')    if options.keep_contained == False:        delContigs(containedIndexes)        if len(containedIndexes) > 0: verbose('qualtofa: %d contained contig(s) removed from the analysis.\n' % len(containedIndexes))    elif len(containedIndexes) > 0: verbose('qualtofa: option -k/--keep-contained: %d contained contig(s) included in the analysis.\n' % len(containedIndexes))
 * 1) Contained contig extraction (-k option implimentation)####################################################################

def getIUPAC(baseValues): '''Takes a list of base character values (ACTG-~N) and condenses them into a single IUPAC code. Assumes only input characters to be ACTG-~N''' def IUPAC(bases): conversion = {'CT':'Y','AG':'R','AT':'W','CG':'S','GT':'K','AC':'M','AGT':'D','ACG':'V','ACT':'H','CGT':'B','ACGT':'N'} bases.sort #['A', 'C', 'G', 'T'] if bases[0].islower: bases = map(str.upper,bases) return str.lower(conversion[''.join(bases)]) else: return conversion[''.join(bases)] chars = removeRepeats(baseValues) chars.sort #['-', 'A', 'C', 'G', 'N', 'T', 'a', 'c', 'g', 'n', 't', '~'] if len(chars) == 0: return '-' if len(chars) == 1: return chars[0] if chars[-1] == '~':  #converts '~' to '-' del chars[-1] if chars[0] != '-': chars.insert(0,'-') priorityList = ('ACGT','N','-','acgt','n') for group in priorityList: matchs = [base for base in chars if group.find(base) != -1] if len(matchs) == 0: continue elif len(matchs) == 1: return matchs[0] else: return IUPAC(matchs)

if isAlignment: if options.no_match_overlap or options.no_overlap: if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: Adding deletions to the reference to accomodate all overlapping sequence...') else: verbose('qualtofa: option -a/--no-match-overlap: Adding deletions to the reference to accomodate overlapping matches...') matchRanges = cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0 for index in range(0,len(cntgMatchRanges))] if options.no_overlap: interSeqGaps = [alignmentIndexs[index + 1][0] - alignmentIndexs[index][1] -1 for index in range(0,len(contigs) - 1)] else: interSeqGaps = [matchRanges[index + 1][0] - matchRanges[index][1] - 1 for index in range(0,len(contigs) - 1)] verbose('Complete\n') gapSum = sum([abs(gap) for gap in interSeqGaps if gap < 0]) totalLen = sum(map(len,contigs)) if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: %d of %d total bases (%d percent) in the consensus is overlaping sequence.\n' % (gapSum,totalLen,gapSum*100/len(reference))) else: verbose('qualtofa: option -a/--no-match-overlap: %d of %d total bases (%.1f%%) in the consensus are overlapping matchs.\n' % (gapSum,totalLen,float(gapSum)*100/len(reference))) refOffset = 0 for index in range(0,len(interSeqGaps)): if interSeqGaps[index] < 0:  #if matching sequence overlaps... for count in range(0,abs(interSeqGaps[index])): reference.insert(matchRanges[index][1] + refOffset + 1, '-') refOffset += abs(interSeqGaps[index]) alignmentIndexs[index + 1][0] += refOffset alignmentIndexs[index + 1][1] += refOffset
 * 1) Overlap handleing (-i, -n options implimentation)############################################################################

def getProp(call, qual): equivalancies = {'A':0,'C':1,'G':2,'T':3,'N':4} try: callIndex = equivalancies[call.upper] except KeyError: errorExit('getProp: Non-neucleotide base %s encountered...' % call) if callIndex == 4: return float(max(qual[:4]))/float(qual[-1]) return float(qual[callIndex])/float(qual[-1]) if isAlignment: verbose('qualtofa: Creating consensus...') numMasked, numQualMasked, SNPmasked, SNPqualmasked, numPropMasked, SNPpropMasked, SNPcount = (0,0,0,0,0,0,0) if sum(options.mask_SNPs + options.mask_consensus) > 0: includeMaskedCon = True else: includeMaskedCon = False rawConsensous = [[] for index in range(0,len(reference))]   if includeMaskedCon: maskedRawCon = [[] for index in range(0,len(reference))]    for cIndex in range(0,len(contigs)):        for bIndex in range(0,len(contigs[cIndex])):            rIndex = alignmentIndexs[cIndex][0] + bIndex            refChar = reference[rIndex]            cntgChar = contigs[cIndex][bIndex]            rawConsensous[rIndex].append(cntgChar)            if includeMaskedCon:                if cntgChar.isupper and refChar.isalpha and refChar != cntgChar:                    SNPcount += 1                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0]:                        cntgChar = 'N'                        SNPqualmasked += 1                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:                        cntgChar = 'N'                        SNPpropMasked += 1                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:                        SNPmasked += 1                if contigs[cIndex][bIndex].isalpha:                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0]:                        numQualMasked += 1                        if contigs[cIndex][bIndex].isupper: cntgChar = 'N'                        else: cntgChar = 'n'                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:                        numPropMasked += 1                        if contigs[cIndex][bIndex].isupper: cntgChar = 'N'                        else: cntgChar = 'n'                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:                        numMasked += 1                maskedRawCon[rIndex].append(cntgChar)               conSequence = [getIUPAC(charList) for charList in rawConsensous]    if includeMaskedCon: maskedConSeq = [getIUPAC(charList) for charList in maskedRawCon]    verbose('Complete\n')    totalLen = len(conSequence)    if sum(options.mask_SNPs) > 0: verbose('qualtofa: %d of %d total consensus SNPs (%.1f%%) were masked.\n' % (SNPmasked,SNPcount, float(SNPmasked*100)/SNPcount))    if options.mask_SNPs[0] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPqualmasked,SNPcount, float(SNPqualmasked*100)/SNPcount, options.mask_SNPs[0]))    if options.mask_SNPs[1] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropMasked,SNPcount, float(SNPpropMasked*100)/SNPcount, options.mask_SNPs[1]))    if options.mask_consensus[0] > 0: verbose('qualtofa: %d of %d total consensus bases (%.1f%% )were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen))            if options.mask_consensus[0] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_consensus[0]))            if options.mask_consensus[1] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_consensus[1]))
 * 1) Consensous Creation (-M, -S options implimentation)############################################################################

if isAlignment: SNPcovNum, SNPpropNum, SNPmaskedNum, SNPcount = (0,0,0,0) SNPs = [] for cIndex in range(0,len(contigs)): for bIndex in range(0,len(contigs[cIndex])): refPos = bIndex + alignmentIndexs[cIndex][0] refChar = reference[refPos] cntgChar = copy.deepcopy(contigs[cIndex][bIndex]) if cntgChar.isupper and refChar.isalpha and cntgChar.isalpha and refChar != cntgChar: SNPcount += 1 SNPs.append([refPos,refChar,headers[cIndex],bIndex,cntgChar] + qualities[cIndex][bIndex]) if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0]: contigs[cIndex][bIndex] = 'N'                   SNPcovNum += 1 if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]: contigs[cIndex][bIndex] = 'N'                   SNPpropNum += 1 if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]: SNPmaskedNum += 1 totalLen = sum(map(len,contigs)) if sum(options.mask_contig_SNPs) > 0: verbose('qualtofa: %d of %d total SNPs (%.1f%%) were masked\n' % (SNPmaskedNum,SNPcount,float(SNPmaskedNum*100)/SNPcount)) if options.mask_contig_SNPs[0] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPcovNum,SNPcount,float(SNPcovNum*100)/SNPcount,options.mask_contig_SNPs[0])) if options.mask_contig_SNPs[1] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropNum,SNPcount,float(SNPpropNum*100)/SNPcount,options.mask_contig_SNPs[1]))
 * 1) Contigs SNP Detection/Masking(-s option implimentation)###################################################################

if sum(options.mask_contigs) > 0: numMasked, numQualMasked, numPropMasked = (0,0,0) for cIndex in range(0,len(contigs)): for bIndex in range(0,len(contigs[cIndex])): cntgChar = copy.deepcopy(contigs[cIndex][bIndex]) if cntgChar.isalpha: if qualities[cIndex][bIndex][-1] < options.mask_contigs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]: numMasked += 1 if qualities[cIndex][bIndex][-1] < options.mask_contigs[0]: if contigs[cIndex][bIndex].isupper: contigs[cIndex][bIndex] = 'N'                   else: contigs[cIndex][bIndex] = 'n'                    numQualMasked += 1 if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]: if contigs[cIndex][bIndex].isupper: contigs[cIndex][bIndex] = 'N'                   else: contigs[cIndex][bIndex] = 'n'                    numPropMasked += 1 totalLen = sum(map(len,contigs)) verbose('qualtofa: %d bases of %d total bases (%.1f%%) were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen)) verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_contigs[0])) verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_contigs[1]))
 * 1) Contigs Masking(-m option implimentation)#################################################################################

if len(containedContigs) > 0: outPath = qualPath + '_contained_contigs.fsa' verbose('qualtofa: Saving contained contigs to %s...' % outPath) saveFasta(outPath, *zip(*containedContigs)) verbose('Complete\n')
 * 1) Contianed Contigs file saving#############################################################################################

if options.save_SNPs: outPath = qualPath + '_SNPs.qual' verbose('qualtofa: Saving %d SNPs to %s...' % (len(SNPs),outPath)) outHandle = open(outPath, 'w') outHandle.write('Schema:\nPosition\tReference\tName\tIndex\tcall\tA\tC\tG\tT\tdash\tsum\n') for SNP in SNPs: outHandle.write('\t'.join(map(str,SNP)) + '\n') outHandle.close verbose('Complete\n')
 * 1) SNP file saving###########################################################################################################

if isAlignment and options.dont_align_contigs == False: contigs = [['-'] * alignmentIndexs[index][0] + contigs[index] for index in range(0,len(contigs))]
 * 1) Contig sequence alignment#################################################################################################

verbose('qualtofa: Saving fasta output to %s...' % (qualPath + '.fsa')) conHeader = 'Consensus' maskedConHeader = 'Masked Consensus' if isAlignment: outputHeads, outputSeqs = ([refHeader,conHeader],[reference,conSequence]) if includeMaskedCon: outputHeads.append(maskedConHeader) outputSeqs.append(maskedConSeq) else: outputHeads, outputSeqs = ([],[]) if options.include_contigs: outputHeads += headers outputSeqs += contigs outPath = os.path.join(os.getcwd, os.path.basename(qualPath) + '.fsa') saveFasta(outPath, outputHeads, outputSeqs) verbose('Complete\n') if options.save_run_info: runtimeOutPath = os.path.join(os.getcwd, 'qualtofa_runtime_output_%s.txt' % timeStamp) verbose('qualtofa: option -i/--save-run-info: saving run-time output to %s...' % runtimeOutPath) runHandle = open(runtimeOutPath, 'w') for line in printLog: runHandle.write(line) runHandle.close verbose('Complete\n')
 * 1) Output file Saving########################################################################################################