User:Lindenb/Notebook/UMR915/20100915

From OpenWetWare
Jump to navigationJump to search

20100914        Top        20100916       


MongoDB evaluation

inserting Pubmed records in mongo. wrote a XSLT stylesheet File:Pubmed2mongo.xsl , saved pubmed query "Darwin C"[PS] to XML and transform:

  xsltproc pubmed2mongo.xsl ~/pubmed_result.txt > jeter.js

content of jeter.js

  db.articles.drop();
   
   article={_id:20665232,pmid:20665232,created:{year:2010,month:8,day:10},title:"Charles Darwin's beagle voyage, fossil vertebrate succession, and \"the gradual birth & death of species\".",issue:"2",volume:"43",pgn:"363-99",joural:{title:"Journal of the history of biology",abbr:"J Hist Biol",issn:"0022-5010"},doi:"10.1007/s10739-009-9189-9",lang:"eng",authors:[{firstName:"Paul D",lastName:"Brinkman"}],mesh:["Animals","Fossils","History, 19th Century","Natural History","Phylogeny","Vertebrates"]};
   db.articles.save(article);
   
   article={_id:20626121,pmid:20626121,created:{year:2010,month:7,day:14},title:"[The biomedical legacy of Charles Darwin]",issue:"2",volume:"146",pgn:"87-9",joural:{title:"Gaceta médica de México",abbr:"Gac Med Mex",issn:"0016-3813"},lang:"spa",authors:[{firstName:"Emilio",lastName:"García-Procel"}],mesh:["Biology","Evolution","History, 19th Century","History, 20th Century","Medicine"]};
   db.articles.save(article);
   
   article={_id:20503821,pmid:20503821,created:{year:2010,month:5,day:27},title:"Darwin and the popularization of evolution.",issue:"1",volume:"64",pgn:"5-24",joural:{title:"Notes and records of the Royal Society of London",abbr:"Notes Rec R Soc Lond",issn:"0035-9149"},lang:"eng",authors:[{firstName:"Bernard",lastName:"Lightman"}],mesh:["Biology","Evolution","Genetic Fitness","History, 19th Century","History, 20th Century","Humans","Male","Philosophy","Religion","Science","Selection, Genetic","United States"]};
   db.articles.save(article);
   (...)

insert into mongo:

  ~/package/mongodb-linux-i686-1.6.1/bin/mongo pubmed jeter.js
    MongoDB shell version: 1.6.1
    connecting to: pubmed

killer queries

 db.articles.group(
 	{
 	key:{},
 	cond:{mesh:"Evolution"},
 	initial:{journal:{},total:0},
 	reduce: function(object, aggregate)
 		{
 		var count=aggregate.journal[object.journal.title];
 		if(!count)
 			{
 			count=0;
 			}
 		count++;
 		aggregate.journal[object.journal.title]=count;
 		aggregate.total++;
   		},
 	finalize:function(aggregate)
 		{
 		for(j in aggregate.journal)
 			{
 			if( aggregate.journal[j]<3)
 				{
 				delete  aggregate.journal[j];
 				}
 			}
 		}
 	})

other queries

posted on my blog: http://plindenbaum.blogspot.com/2010/09/mongodb-and-ncbi-pubmed-inserting.html