User:Lindenb/Notebook/UMR915/20101011
From OpenWetWare

Charpak
Re-Implementing LINKAGE. http://code.google.com/p/code915/source/detail?r=274
Belgium
TODO: for chr1XXX
- list SNP with mutation
- SIFT
- polyphen
- remove individual 4
- add annotations, etc...
Generatic SIFT input for indi2
Used jrunscript for extracting data
importPackage(java.io);
importPackage(java.util.zip);
var f=new File("*.csv.gz");
var input=new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f))));
var line;
while((line=input.readLine())!=null)
{
if(!line.startsWith("\"chXXXXX mapping")) continue;
line=line.replaceAll("\"","").replace(" mapping","").replace(".","");
var tokens=line.split("[;]");
if(tokens[3].indexOf("SNP")==-1) continue;
var bases=tokens[7].split("/");
for(var i=0;i< bases.length;++i)
{
println("XXXXXX,"+tokens[1]+",1,"+tokens[5]+"/"+bases[i]);
}
}
input.close();
run
/usr/local/package/glassfishv3/jdk/bin/jrunscript -f ~/jeter20101011a.js > ~/result.txt
split input
split -C 900k result.txt sift_
and polyphen
importPackage(java.io);
importPackage(java.util.zip);
var f=new File("XXXXXX.csv.gz");
var input=new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f))));
var line;
while((line=input.readLine())!=null)
{
if(!line.startsWith("\"chrXXXXXXXXXXXXXXXXXXXXXXXXXX mapping")) continue;
line=line.replaceAll("\"","").replace(" mapping","").replace(".","");
var tokens=line.split("[;]");
if(tokens[3].indexOf("SNP")==-1) continue;
var bases=tokens[7].split("/");
print("chrXXXXXXXXXXXXXX:"+tokens[1]+"\t"+tokens[7]);
if(tokens[7].indexOf(tokens[5])==-1) print("/"+tokens[5]);
println("");
}
input.close();