User:Timothee Flutre/Notebook/Postdoc/2012/05/25
From OpenWetWare
Project name | Main project page Previous entry Next entry |
About one-liners in data wrangling
for i in {1..10}; do echo $i; done | sed 3,6d
$ for i in {1..20}; do echo $i; done | sed -n 3,5p
$ for i in {-5..5}; do echo $i; done | awk 'function abs(x){return (((x < 0.0) ? -x : x) + 0.0)} {print abs($1)}'
$ for i in {1..10}; do echo $i; done | Rscript -e 'summary(read.table("stdin"))'
$ echo -e "gene\tsnp\tpvalue\ng1\ts1\t0.3\ng1\ts2\t0.002\ng2\ts2\t0.7\ng2\ts3\t0.05" > dat.txt gene snp pvalue g1 s1 0.3 g1 s2 0.002 g2 s2 0.7 g2 s3 0.05 $ cat dat.txt | sed 1d | sort -k1,1 -k3,3 | awk '{print $3"\t"$2"\t"$1}' | uniq -f2 g1 s2 0.002 g2 s3 0.05
$ subgroups=("s1" "s2" "s3" "s4"); for i in {0..2}; do let a=$i+1; for j in $(seq $a 3); do s1=${subgroups[$i]}; s2=${subgroups[$j]}; echo $s1 $s2; done; done
$ awk 'BEGIN{RS=">"} {if(NF==0)next; split($0,a,"\n"); printf "@"a[1]"\n"a[2]"\n+\n"; \ for(i=1;i<=length(a[2]);i++)printf "}"; printf"\n"}' probes.fa > probes.fq
$ echo -e ">chr1\nAAA\n>chr2\nTTT\n>chr3\nGGG\n" | awk 'BEGIN{RS=">"} /chr2/ {print $0}'
$ echo -e "x\ty"; for i in {1..10}; do echo -e $i"\t"$RANDOM; done | (read -r; printf "%s\n" "$REPLY"; sort -k2,2n)
$ echo -e "gene\tsnp\tpvalue\ngene1\tsnp1\t0.002\ngene2\tsnp2\t0.8\ngene2\tsnp3\t0.1" > file_all.txt $ echo -e "gene1\tsnp1" > file_subset.txt $ awk 'NR==FNR{a[$1$2]++;next;}{x=$1$2;if(x in a)print $0}' file_subset.txt <(sed 1d file_all.txt)
$ awk 'BEGIN{RS=">"} {split($0,a,"\n"); if(length(a)==0) next; seqlen=0; for(i=2;i<=length(a);++i){seqlen += length(a[i])}; printf a[1]"\t"seqlen"\n"}' sequences.fa
$ zcat reads.fq.gz | awk '(NR % 4 == 2)' | cut -c 6-9
$ echo "AAATGAGCC" | rev | tr ATGC TACG
$ cat 12870_2016_754_MOESM4_ESM.csv | sed -n 1123p | cut -f2 | od -An -c -b L i s z t e s 302 240 f e h e r \n 114 151 163 172 164 145 163 302 240 146 145 150 145 162 012 $ cat 12870_2016_754_MOESM4_ESM.csv | sed -n 1123p | cut -f2 | sed 's/\xC2\xA0/ /g' | od -An -c -b L i s z t e s f e h e r \n 114 151 163 172 164 145 163 040 146 145 150 145 162 012
$ echo "project_all-lanes/H3NHKBBXX_7/demultiplex/H3NHKBBXX_7_A3-30-10-10_R1.fastq.gz" | awk '{match($0, /([a-zA-Z0-9-]*)_(R[12])/, a); print a[1]}' |