User:Lindenb/Notebook/UMR915/20101121

Belgium
and with CLC data ?

gunzip -c 454AllStructVars.txt.gz 454AllDiffs.txt.gz | egrep "^>" | cut -c 2- | cut -d '      ' -f 1,2 | sort | uniq | grep -v Refer > jeter.clc.sampl1.txt

$ wc -l jeter.clc.sampl1.txt 8930 jeter.clc.sampl1.txt

comm -1 -2 jeter.clc.sampl1.txt jeter.freebayes.sample1.txt | wc   2336    4672   34029

NGS course
PROXY=cache.u14.univ-nantes.prive:3128 bwa.dir=${HOME}/package/bwa/bwa-0.5.7 bwa.bin=${bwa.dir}/bwa sam.dir=${HOME}/package/sam/samtools-0.1.7a sam.bin=${sam.dir}/samtools LIMIT=1000004 rails:RAILS/rails4pileup/db/vcf.sqlite3
 * 1) source("http://bioconductor.org/biocLite.R")
 * 2) biocLite ("ShortRead")

RAILS/rails4pileup/db/vcf.sqlite3:RAILS/rails4pileup pileup.filtered.txt awk -F '	' 'BEGIN {printf(" create table vcfs(id integer primary key,chrom varchar(50), position int, ref varchar(2), alt varchar(50),depth int);\n");} {printf("insert into vcfs(chrom,position,ref,alt,depth) values(\"%s\",%s,\"%s\",\"%s\",%s);\n",$$1,$$2,$$3,$$4,$$5);}' pileup.filtered.txt | sqlite3 $@ #ruby RAILS/rails4pileup/script/generate model Vcf #ruby RAILS/rails4pileup/script/generate controller Vcf ruby RAILS/rails4pileup/script/generate scafold vcf chrom:string position:int ref:string alt:string depth:int cat RAILS/rails4pileup/config/database.yml | sed 's/\(test\|development\|production\)\.sqlite3/vcf.sqlite3/' > /tmp/tmp.yml mv /tmp/tmp.yml RAILS/rails4pileup/config/database.yml echo "http://localhost:3000/vcfs"

RAILS/rails4pileup: mkdir -p RAILS rails RAILS/rails4pileup

soap: mkdir -p ncbi wsimport -keep -d ncbi "http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/v2.0/efetch_snp.wsdl"

input.table.ucsc.txt:pileup.filtered.txt cut -d '	' -f 1,2 pileup.filtered.txt | awk '{printf("%s\t%d\t%d\n",$1,int($2)-1,int($2));}' > $@

pileup.filtered.txt:pileup.txt ${sam.dir}/misc/samtools.pl varFilter -d 10 pileup.txt > $@

pileup.txt:sorted.bam.bai ${sam.bin} pileup -v -c -f chr22.fa sorted.bam > $@

sorted.bam.bai:sorted.bam ${sam.bin} index sorted.bam sorted.bam:aln.bam ${sam.bin} sort aln.bam sorted

aln.bam:aln.sam ${sam.bin} view -b -T chr22.fa aln.sam > $@

aln.sam:aln1.sai aln2.sai reads_1.fastq reads_2.fastq ${bwa.bin} sampe chr22db aln1.sai aln2.sai reads_1.fastq reads_2.fastq  > $@

aln2.sai:chr22db.bwt reads_2.fastq ${bwa.bin} aln chr22db reads_2.fastq > $@

aln1.sai:chr22db.bwt reads_1.fastq ${bwa.bin} aln chr22db reads_1.fastq > $@

reads_1.fastq reads_2.fastq: curl --proxy ${PROXY} "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/static/SRX006/SRX006000/SRR018111_1.fastq.bz2" |\ bunzip2 -c | head -n ${LIMIT} > reads_1.fastq curl --proxy ${PROXY} "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/static/SRX006/SRX006000/SRR018111_2.fastq.bz2" |\ bunzip2 -c | head -n ${LIMIT} > reads_2.fastq

chr22db.bwt:chr22.fa	${bwa.bin} index -p chr22db -a bwtsw chr22.fa

chr22.fa.fai:chr22.fa	${sam.bin} faidx chr22.fa

chr22.fa: curl --proxy ${PROXY} "http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr22.fa.gz" |\ gunzip -c > $@