User:Lindenb/Notebook/UMR915/20100915

=MongoDB evaluation= inserting Pubmed records in mongo. wrote a XSLT stylesheet, saved pubmed query "Darwin C"[PS] to  XML and transform:

xsltproc pubmed2mongo.xsl ~/pubmed_result.txt > jeter.js

content of jeter.js  db.articles.drop; article={_id:20665232,pmid:20665232,created:{year:2010,month:8,day:10},title:"Charles Darwin's beagle voyage, fossil vertebrate succession, and \"the gradual birth & death of species\".",issue:"2",volume:"43",pgn:"363-99",joural:{title:"Journal of the history of biology",abbr:"J Hist Biol",issn:"0022-5010"},doi:"10.1007/s10739-009-9189-9",lang:"eng",authors:[{firstName:"Paul D",lastName:"Brinkman"}],mesh:["Animals","Fossils","History, 19th Century","Natural History","Phylogeny","Vertebrates"]}; db.articles.save(article); article={_id:20626121,pmid:20626121,created:{year:2010,month:7,day:14},title:"[The biomedical legacy of Charles Darwin]",issue:"2",volume:"146",pgn:"87-9",joural:{title:"Gaceta médica de México",abbr:"Gac Med Mex",issn:"0016-3813"},lang:"spa",authors:[{firstName:"Emilio",lastName:"García-Procel"}],mesh:["Biology","Evolution","History, 19th Century","History, 20th Century","Medicine"]}; db.articles.save(article); article={_id:20503821,pmid:20503821,created:{year:2010,month:5,day:27},title:"Darwin and the popularization of evolution.",issue:"1",volume:"64",pgn:"5-24",joural:{title:"Notes and records of the Royal Society of London",abbr:"Notes Rec R Soc Lond",issn:"0035-9149"},lang:"eng",authors:[{firstName:"Bernard",lastName:"Lightman"}],mesh:["Biology","Evolution","Genetic Fitness","History, 19th Century","History, 20th Century","Humans","Male","Philosophy","Religion","Science","Selection, Genetic","United States"]}; db.articles.save(article); (...)

insert into mongo:

~/package/mongodb-linux-i686-1.6.1/bin/mongo pubmed jeter.js    MongoDB shell version: 1.6.1 connecting to: pubmed

killer queries
db.articles.group( 	{  	key:{},  	cond:{mesh:"Evolution"},  	initial:{journal:{},total:0},  	reduce: function(object, aggregate)  		{  		var count=aggregate.journal[object.journal.title];  		if(!count)  			{  			count=0;  			}  		count++;  		aggregate.journal[object.journal.title]=count;  		aggregate.total++;    		},  	finalize:function(aggregate)  		{  		for(j in aggregate.journal)  			{  			if( aggregate.journal[j]<3)  				{  				delete  aggregate.journal[j];  				}  			}  		}  	})

other queries
posted on my blog: http://plindenbaum.blogspot.com/2010/09/mongodb-and-ncbi-pubmed-inserting.html