User:Lindenb/Notebook/UMR915/20101206

=Dindel= Running Dindel with the whole BAM/Genome takes too much time. Now, running dindel on the exome only:

DINDEL_DIR=/usr/local/package/dindel-1.01 HG18=/path/to/pubdb/ucsc/hg18/chromosomes/hg18.fa BAM=/the/path/to/my.bam SAMTOOLS=/usr/local/package/samtools-0.1.10/samtools EXOME=/path/to/Agilent_sanger_exome_0809_PCCR.bed rm -f all.VCF header.VCF

awk '{printf("%s,%d,%d\n",$1,int($2)-10000,int($3)+10000);}' ${EXOME} | while read L; do	echo $L; CHR=`echo $L| cut -d "," -f1` START=`echo $L| cut -d "," -f2` END=`echo $L| cut -d "," -f3` ID=`echo $L| tr "," "_"` ${SAMTOOLS} view -b -o jeter.bam ${BAM} "${CHR}:${START}-${END}" ${SAMTOOLS} index jeter.bam ${BAM}

${DINDEL_DIR}/binaries/dindel-1.01-linux-64bit --analysis getCIGARindels --bamFile jeter.bam --outputFile sample.dindel_output --ref ${HG18} python ${DINDEL_DIR}/dindel-1.01-python/makeWindows.py --inputVarFile sample.dindel_output.variants.txt --windowFilePrefix sample.realign_windows --numWindowsPerFile 1000 for I in `ls sample.realign_windows.*.txt | cut -d '.' -f 3` do ${DINDEL_DIR}/binaries/dindel-1.01-linux-64bit --analysis indels --doDiploid --bamFile jeter.bam --outputFile sample.dindel_stage2_output_windows.$I  --ref ${HG18} --varFile sample.realign_windows.$I.txt --libFile sample.dindel_output.libraries.txt done ls *.glf.txt > sample.dindel_stage2_outputfiles.txt python ${DINDEL_DIR}/dindel-1.01-python/mergeOutputDiploid.py --inputFiles sample.dindel_stage2_outputfiles.txt \ --outputFile jeter.vcf --ref ${HG18} grep "#" jeter.vcf > header.VCF grep -v "#" jeter.vcf >> all.VCF rm -f jeter.vcf jeter.bam sample.dindel_output* sample.dindel_stage2_* sample.realign_windows.*

done Dec 7th morning: all.VCF is empty :-( . Bug in dindel ?

+ /usr/local/package/dindel-1.01/binaries/dindel-1.01-linux-64bit --analysis indels --doDiploid --bamFile jeter.bam --outputFile sample.dindel_stage2_output_windows.1 --ref /GENOTYPAGE/data/pubdb/ucsc/hg18/chromosomes/hg18.fa --varFile sample.realign_windows.1.txt --libFile sample.dindel_output.libraries.txt Reading BAM file: jeter.bam max: 0.0005 ninetyfifth_pct_prob: 0.0005 [bam_index_load] fail to load BAM index. Detected library file... max: 0.00466328 ninetyfifth_pct_prob: 0.000274311 Library bwa_rmdup_Brs1.bam loaded with 748 insert sizes. DetInDel parameters: tid: 1 width: 60 maxHap: 8 maxReads: 10000 skipMaxHap: 200 outputFilename: sample.dindel_stage2_output_windows.1 mapQualThreshold: 0.99 inferenceMethod: empirical analyzeLowFreq: 0 analyzeLowFreqDiffThreshold: 0.5 showHapDist: 0 minReadOverlap: 20 maxReadLength: 500 maxHapReadProd: 10000000 showCandHap: 0 showReads: 0 filterHaplotypes: 0 noIndelWindow: -1 mapUnmappedReads: 1 numOutputTopHap: 5 checkAllCIGARs: 1 changeINStoN: 0

quiet: 0 printCallsOnly: 0 faster: 0 doDiploid: 1 doEM: 0 outputPooledLikelihoods: 0 outputRealignedBAM: 0 processRealignedBAM: no   showHapAlignments: 0 EM tol: 0.0001 bayesEM a0: 0.001 bayesType: singlevariant priorIndel: 0.0001 priorSNP: 0.001 filterReadAux: Observation model parameters: modelType: probabilistic maxLengthIndel: 5 pError: 0.0005 baseQualThreshold: 0.995 fixedBaseQual: 0.99 mapQualThreshold: 100 capMapQualFast: 45 minOverlap: 0 scaleError: 0.95 numE: 3 pMut: 1e-05 numIndels: 1 indelDistribution: exponential maxLengthDel: 5 pError: 0.0005 pFirstgLO: 0.01 padCover: 2 maxMismatch: 2 maxTryHash: 5 checkBaseQualThreshold: 0.95 mapUnmappedReads: 1 tid: chr1 pos: 10575 leftPos: 10516 rightPos: 10635 Fetching reads.. dindel20101206c.sh: line 20: 18095 Segmentation fault     ${DINDEL_DIR}/binaries/dindel-1.01-linux-64bit --analysis indels --doDiploid --bamFile jeter.bam --outputFile sample.dindel_stage2_output_windows.${I} --ref ${HG18} --varFile sample.realign_windows.${I}.txt --libFile sample.dindel_output.libraries.txt

I also tried using dindel (...) --tid "${CHR}" --region "${START}-${END}" for '--analysis getCIGARindels'  but then it gave me the feeling that the parameters were just ignored.