User:Lindenb/Notebook/UMR915/20100927

From OpenWetWare
Jump to navigationJump to search

20100924        Top        20100928       


Moving SQL to mongo

VCFs

   mysql -N -u anonymous \
    -e 'select * from vcf_input' -D umr915 |\
     awk -F ' ' '{printf("v={sqlid:%d,fileformat:\"%s\",filename:\"%s\",description:\"%s\",creation:\"%s\",projects:[\"XXX\"],individuals:[],headers:[]};db.vcfinputs.save(v);\n",$1,$2,$3,$4,$6);}' \
      > jeter.js

indexes

  db.vcfinputs.ensureIndex({projects:1})
  db.vcfinputs.ensureIndex({individuals:1})

add headers:

  mysql -u anonymous -N -e 'select distinct input_id,propValue from vcf_input_meta' -D umr915 |\
  awk -F '   ' "{printf(\"v= db.vcfinputs.findOne({sqlid:%s}); if(v!=null) {v.headers.push(\'%s\');db.vcfinputs.save(v);}\n\",\$1,\$2);}"


> v=db.vcfinputs.findOne({sqlid:6})
{
 	"_id" : ObjectId("4ca07bb0302820c9d33ff054"), 
	"sqlid" : 6,
	"fileformat" : "PILEUP",
	"filename" : "XXXXX",
	"description" : "XXXXXX",
	"creation" : "2010-06-23 10:40:08", 
	"projects" : [
 		"XX"
 	],
	"individuals" : [ ],
	"headers" : [
		"##fileformat=VCFv3.3",
		"##filedate=2010-05-26 18:01:43",
		"##reference=NCBI36",
		"##dbSNP=dbSNP129",
		"##phasing=none",
		"##annotation=ensembl.54",
		"##INFO=DP,1,Integer,\"Total Depth\"",
		"##INFO=AC,1,String,\"Allele count\"",
		"##INFO=AN,1,Integer,\"Total number of alleles\"",
		"##INFO=MQ,1,Integer,\"MQ RMS mapping quality\"",
		"##INFO=CQ,1,String,Highest ensembl protein coding gene consequence\"",
		"##INFO=GN,1,String,\"Gene name\"",
		"##INFO=DB,1,Integer,\"dbSNP entry\"",
		"##INFO=HM3,0,Flag,\"Hapmap3 membership\"",
		"##INFO=PA,1,String,\"Population data\"",
		"##INFO=PS,1,String,\"Population source\"",
		"##INFO=NC,1,String,\"Nucleotide conservation (GERP)\"",
		"##INFO=CNV,0,Flag,\"In known CNV\"",
		"##INFO=MZ,1,Integer,\"Number of mapping quality zero reads\""
	]
 }

Families

adding the 10 families in mongodb

stopping mongodb

stop mongodb

 mongo
 > use admin
 switched to db admin
 > db.shutdownServer()