User:Lindenb/Notebook/UMR915/2012/02/21

From OpenWetWare
< User:Lindenb‎ | Notebook‎ | UMR915‎ | 2012‎ | 02
Jump to: navigation, search
Owwnotebook icon.png Project name Report.pngMain project page
Next entryResultset next.png

Daily


Casava output

SNP

>>>	2
$1	seq_name           	chr22.fa
$2	pos                	16050821
$3	bcalls_used        	4
$4	bcalls_filt        	0
$5	ref                	C
$6	Q(snp)             	1
$7	max_gt             	CC
$8	Q(max_gt)          	6
$9	max_gt|poly_site   	CT
$10	Q(max_gt|poly_site)	27
$11	A_used             	0
$12	C_used             	3
$13	G_used             	0
$14	T_used             	1
<<<	2

Indel

>>>	2
$1	seq_name          	chr22.fa
$2	pos               	16052168
$3	type              	4I
$4	ref_upstream      	CTATCTCAAA
$5	ref/indel         	----/AAAC
$6	ref_downstream    	AAACAAACAA
$7	Q(indel)          	365
$8	max_gtype         	het
$9	Q(max_gtype)      	203
$10	depth             	26
$11	alt_reads         	5
$12	indel_reads       	7
$13	other_reads       	14
$14	repeat_unit       	AAAC
$15	ref_repeat_count  	8
$16	indel_repeat_count	9
<<<	2


Analyse

VARKIT=${HOME}/src/variationtoolkit/bin

rm -f _tmp1.txt
echo -e "#SAMPLE\tVCF" > _s2v.txt
for F in SAMPLE*
do
	echo -e "$F\t${F}/${F}.vcf" >> _s2v.txt
 	${VARKIT}/casava2vcf ${F}/*/${F}_chr*.txt |\
		sed 's/^\(chr[0-9A-Z]*\).fa/\1/' |\
		sed "s/__SAMPLE__/${F}/" |\
		${VARKIT}/vcfid  --host localhost --user anonymous --port 3316 > ${F}/${F}.vcf
		
		egrep -v "^#" ${F}/${F}.vcf >> _tmp1.txt 

done

echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE" > _tmp2.txt
sort -u -t '	' -k1,1 -k2,2n -k4,4 -k5,5 _tmp1.txt  >> _tmp2.txt


cat _tmp2.txt |\
java -jar /usr/local/package/snpEff_2_0_5/snpEff.jar eff \
-i vcf -c /usr/local/package/snpEff_2_0_5/snpEff.config  hg19 | gzip --best > predictions.txt.gz

${VARKIT}/scanvcf _s2v.txt |\
sort -t '	' -k1,1 -k2,2n -k4,4 -k5,5 -k11,11 |\
${VARKIT}/samplespersnp --sample 11 2> /dev/null > _tmp2.txt

egrep "^#" _tmp2.txt > all.vcf
egrep -v "^#" _tmp2.txt >> all.vcf
gzip --best -f all.vcf



rm _tmp1.txt _tmp2.txt _s2v.txt