User:Lindenb/Notebook/UMR915/20101011

Charpak
Re-Implementing LINKAGE. http://code.google.com/p/code915/source/detail?r=274

Belgium
TODO: for chr1XXX
 * list SNP with mutation
 * SIFT
 * polyphen
 * remove individual 4
 * add annotations, etc...

Generatic SIFT input for indi2
Used jrunscript for extracting data

importPackage(java.io); importPackage(java.util.zip); var f=new File("*.csv.gz"); var input=new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f)))); var line; while((line=input.readLine)!=null) {     if(!line.startsWith("\"chXXXXX mapping")) continue;      line=line.replaceAll("\"","").replace(" mapping","").replace(".",""); var tokens=line.split("[;]"); if(tokens[3].indexOf("SNP")==-1) continue; var bases=tokens[7].split("/"); for(var i=0;i< bases.length;++i) {       println("XXXXXX,"+tokens[1]+",1,"+tokens[5]+"/"+bases[i]); }     }    input.close;

run

/usr/local/package/glassfishv3/jdk/bin/jrunscript -f ~/jeter20101011a.js > ~/result.txt

split input

split -C 900k result.txt sift_

and polyphen
importPackage(java.io); importPackage(java.util.zip); var f=new File("XXXXXX.csv.gz"); var input=new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f)))); var line; while((line=input.readLine)!=null) {   if(!line.startsWith("\"chrXXXXXXXXXXXXXXXXXXXXXXXXXX mapping")) continue;    line=line.replaceAll("\"","").replace(" mapping","").replace(".",""); var tokens=line.split("[;]"); if(tokens[3].indexOf("SNP")==-1) continue; var bases=tokens[7].split("/"); print("chrXXXXXXXXXXXXXX:"+tokens[1]+"\t"+tokens[7]); if(tokens[7].indexOf(tokens[5])==-1) print("/"+tokens[5]); println(""); } input.close;