User:Lindenb/Notebook/UMR915/20100915
From OpenWetWare

MongoDB evaluation
inserting Pubmed records in mongo. wrote a XSLT stylesheet File:Pubmed2mongo.xsl , saved pubmed query "Darwin C"[PS] to XML and transform:
xsltproc pubmed2mongo.xsl ~/pubmed_result.txt > jeter.js
content of jeter.js
db.articles.drop();
article={_id:20665232,pmid:20665232,created:{year:2010,month:8,day:10},title:"Charles Darwin's beagle voyage, fossil vertebrate succession, and \"the gradual birth & death of species\".",issue:"2",volume:"43",pgn:"363-99",joural:{title:"Journal of the history of biology",abbr:"J Hist Biol",issn:"0022-5010"},doi:"10.1007/s10739-009-9189-9",lang:"eng",authors:[{firstName:"Paul D",lastName:"Brinkman"}],mesh:["Animals","Fossils","History, 19th Century","Natural History","Phylogeny","Vertebrates"]};
db.articles.save(article);
article={_id:20626121,pmid:20626121,created:{year:2010,month:7,day:14},title:"[The biomedical legacy of Charles Darwin]",issue:"2",volume:"146",pgn:"87-9",joural:{title:"Gaceta médica de México",abbr:"Gac Med Mex",issn:"0016-3813"},lang:"spa",authors:[{firstName:"Emilio",lastName:"García-Procel"}],mesh:["Biology","Evolution","History, 19th Century","History, 20th Century","Medicine"]};
db.articles.save(article);
article={_id:20503821,pmid:20503821,created:{year:2010,month:5,day:27},title:"Darwin and the popularization of evolution.",issue:"1",volume:"64",pgn:"5-24",joural:{title:"Notes and records of the Royal Society of London",abbr:"Notes Rec R Soc Lond",issn:"0035-9149"},lang:"eng",authors:[{firstName:"Bernard",lastName:"Lightman"}],mesh:["Biology","Evolution","Genetic Fitness","History, 19th Century","History, 20th Century","Humans","Male","Philosophy","Religion","Science","Selection, Genetic","United States"]};
db.articles.save(article);
(...)
insert into mongo:
~/package/mongodb-linux-i686-1.6.1/bin/mongo pubmed jeter.js
MongoDB shell version: 1.6.1
connecting to: pubmed
killer queries
db.articles.group(
{
key:{},
cond:{mesh:"Evolution"},
initial:{journal:{},total:0},
reduce: function(object, aggregate)
{
var count=aggregate.journal[object.journal.title];
if(!count)
{
count=0;
}
count++;
aggregate.journal[object.journal.title]=count;
aggregate.total++;
},
finalize:function(aggregate)
{
for(j in aggregate.journal)
{
if( aggregate.journal[j]<3)
{
delete aggregate.journal[j];
}
}
}
})
other queries
posted on my blog: http://plindenbaum.blogspot.com/2010/09/mongodb-and-ncbi-pubmed-inserting.html