Coveragestats.py
From OpenWetWare
Jump to navigationJump to search
Copy and past the following into a text editor and save it as coveragestats.py to use the script.
#!/usr/bin/env python
###Imports / Variable Initilization##########################################################################################
import os, copy, sys
from optparse import *
commandLine = copy.deepcopy(sys.argv)
cwd = os.getcwd()
cntgMatchRanges = None
progName, progVersion, progArgNum = ('coveragestats.py','1.12', 1)
progUsage = 'python %s [options] <.txt file>' % progName
printLog = []
progDescription = '''
Calculates statistics for each contig in a Tablet output file and outputs them to a tab-delimitated
'.txt' file following the name of their respective contig. The statistics calculated are: number of coverage values,
average coverage depth, percent of bases with a coverage greater than zero, and maximum coverage depth. coveragestats.py
requires that contig names have at least one non-numerical character. Created by: Zachary S. L. Foster'''
#############################################################################################################################
def errorExit(message=None,exitStatus=1):
'''Version 1.0
Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)'''
if message:
print '%s: Error: %s' % (progName, message)
else:
print progName +': Unknown error, printing debug log...'
for line in debugLog: print line #print debug log
sys.exit(exitStatus)
###Command Line Parser#######################################################################################################
cmndLineParser = OptionParser(usage=progUsage, version="Version %s" % progVersion, description = progDescription)
#cmndLineParser.add_option("-v", "--verbose", action="store_true", default=False, help="Print progress updates and relevant statistics to the standard output. (Default: run silently)")
#cmndLineParser.add_option("-r", "--save-run-info", action="store_true", default=False, help="Save a txt file of anything printed to the screen. Requires the use of -v/--verbose. (Default: Do not save)")
(options, args) = cmndLineParser.parse_args(commandLine)
if len(args) == 1:
cmndLineParser.print_help()
sys.exit(0)
argNum = len(args) - 1 #counts the amount of arguments, negating the 'qualtofa' at the start of the command line
if argNum != progArgNum: errorExit('%s takes exactly %d argument(s); %d supplied' % (progName, progArgNum, argNum), 0)
inPath = args[-1] #The file path to the input file supplied by the user
#############################################################################################################################
def verbose(toPrint):
if options.verbose:
print toPrint,
printLog.append(toPrint)
def intable(str):
try:
int(str)
return True
except ValueError: return False
def mkRange(numList):
'''Function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10)'''
outList = []
if len(numList) > 0:
start = numList[0]
current = numList[0]
numList.append(numList[-1])
for num in numList[1:]:
if (num - 1) != current:
outList.append([start,current])
start = num
current = num
return outList
#############################################################################################################################
def average(ints):
return float(sum(ints))/float(len(ints))
def percentCov(ints):
return float(len(ints) - ints.count(0))/float(len(ints)) * 100
funcList = [len, average, percentCov, max]
templateStr = ['%d', '%.2f', '%.2f', '%d']
schema = ['Name','Contig Length','Average Coverage','Percent Coverage','Max Coverage']
inHandle = open(inPath, 'r')
outHandle = open(inPath + '_stats.txt', 'w')
header = inHandle.readline().strip()
coverages = []
outHandle.write('\t'.join(schema) + '\n')
while header != '':
line = inHandle.readline()
lineParts = line.strip().split(' ')
if map(intable,lineParts).count(True) == len(lineParts): #if all text seperated by spaces can be converted to an interger; aka is a list of coverage depths
coverages += map(int,lineParts)
continue
else:
stats = '\t'.join(templateStr) % tuple([func(coverages) for func in funcList])
outHandle.write('%s\t%s\n' % (header,stats))
header = line.strip()
del coverages
coverages = []
del line, lineParts
inHandle.close()
outHandle.close()
###############################################################################################################################