User:Lindenb/Notebook/UMR915/20100622

=myql=

variation tables
create table variation(id int unsigned primary key auto_increment, chrom varchar(30) not null, position int unsigned not null, ref varchar(10) not null, alt varchar(255), creation datetime, modification timestamp) engine=InnoDB; create unique index uniq_var on variation(chrom asc,position asc,ref,alt); alter table variation add index(chrom); alter table variation add index(chrom,position);

vcf input
create table vcf_input(id int unsigned primary key auto_increment,fileformat varchar(50) null,filename varchar(255) null,description TEXT,creation datetime,modified timestamp) engine=InnoDB; create table vcf_input_meta(id int unsigned primary key auto_increment,input_id int unsigned not null, propKey varchar(255) ,propValue varchar(255),creation datetime, modified timestamp,index(input_id), foreign key(input_id) references vcf_input(id) on delete cascade) engine=InnoDB;

variant
create table vcf_call( id int unsigned primary key auto_increment,input_id int unsigned  not null,  variation_id int unsigned not null,  sample_id int unsigned not null,  consensus_quality float,  snp_quality float,  rms_quality float,  depth int,meta text,  creation datetime, modified timestamp,  index(input_id),  index(variation_id),  index(sample_id),  foreign key(input_id) references vcf_input(id) on delete cascade,  foreign key(variation_id) references variation(id) ,  foreign key(sample_id) references sample(id)  ) engine=InnoDB;

=Note about sam2vcf= in sam2vcf.pl my ($chr,$pos,$ref,$cons,$cons_qual,$snp_qual,$rms_qual,$depth,@items) = split(/\t/,$line);

SAM/PILEUP:
 * 0 chr
 * 1 pos
 * 2 ref
 * 3 cons
 * 4 cons_qual
 * 5 snp_qual
 * 6 rms_qual
 * 7 depth

and print $fh_out "$chr\t$pos\t.\t$ref\t$alt\t$snp_qual\t0\t\tGT:GQ:DP\t$gt:$cons_qual:$depth\n"; VCF: =inserting SLS2 dindel data in mysql= wrote tool to insert data http://code.google.com/p/code915/source/browse/trunk/tools/src/java/fr/inserm/umr915/tools/InsertVariations.java
 * 0 chr
 * 1 pos
 * 2 ID="."
 * 3 ref
 * 4 alt
 * 5 snp_qual = "QUAL" (was $5 in pileup)
 * 6 empty (filter)
 * 7 empty (INFO)
 * 8 FORMAT
 * 9 CALL
 * GT= genotype 0|1, 1|2
 * GQ= genotype qual (was $4 in pileup)
 * DP= depth was ($7 in pileup)

grep "#" ./XXXX1_variantCalls.VCF > header.txt grep -v "#" ./XXXX1_variantCalls.VCF | java -jarposfilter.jar -db XXXX_0809_PCCR.bed > jeter.vcf cat header.txt jeter.vcf | java -cp insertvariants.jar:mysql-connector-java-5.1.12-bin.jar fr.inserm.umr915.tools.InsertVariations -s XXX1 -d "XXX" -t vcf

in SLS2: numerous duplicated rows: select vcf_call.* from vcf_call,vcf_input where vcf_call.input_id=vcf_input.id group by vcf_call.variation_id having count(*)>3; =3515