User:Timothee Flutre/Notebook/Postdoc/2012/05/25

From OpenWetWare

(Difference between revisions)
Jump to: navigation, search
(One-liners with GNU tools: add tutorial + list of other tools)
(One-liners with GNU tools: add "sort file with header")
(3 intermediate revisions not shown.)
Line 18: Line 18:
** tr
** tr
** wc
** wc
 +
 +
 +
* '''Skip a subset of successive lines''':
 +
for i in {1..10}; do echo $i; done | sed 3,6d
 +
 +
 +
* '''Extract a subset of successive lines''':
 +
$ for i in {1..20}; do echo $i; done | sed -n 3,5p
* '''Use absolute values:'''
* '''Use absolute values:'''
 +
$ for i in {-5..5}; do echo $i; done | awk 'function abs(x){return (((x < 0.0) ? -x : x) + 0.0)} {print abs($1)}'
-
awk 'function abs(x){return (((x < 0.0) ? -x : x) + 0.0)} {print abs($1)}' input.txt
 
 +
* '''Extract the best snp per gene''':
 +
 +
$ echo -e "gene\tsnp\tpvalue\ng1\ts1\t0.3\ng1\ts2\t0.002\ng2\ts2\t0.7\ng2\ts3\t0.05" > dat.txt
 +
gene    snp    pvalue
 +
g1      s1      0.3
 +
g1      s2      0.002
 +
g2      s2      0.7
 +
g2      s3      0.05
 +
 +
$ cat dat.txt | sed 1d | sort -k1,1 -k3,3 | awk '{print $3"\t"$2"\t"$1}' | uniq -f2
 +
g1      s2      0.002
 +
g2      s3      0.05
 +
 +
 +
* '''Loop over pairs''':
 +
 +
$ subgroups=("s1" "s2" "s3" "s4"); for i in {0..2}; do let a=$i+1; for j in $(seq $a 3); do s1=${subgroups[$i]}; s2=${subgroups[$j]}; echo $s1 $s2; done; done
Line 29: Line 54:
  <nowiki>
  <nowiki>
-
awk 'BEGIN{RS=">"} {if(NF==0)next; split($0,a,"\n"); printf "@"a[1]"\n"a[2]"\n+\n"; \
+
$ awk 'BEGIN{RS=">"} {if(NF==0)next; split($0,a,"\n"); printf "@"a[1]"\n"a[2]"\n+\n"; \
for(i=1;i<=length(a[2]);i++)printf "}"; printf"\n"}' probes.fa > probes.fq
for(i=1;i<=length(a[2]);i++)printf "}"; printf"\n"}' probes.fa > probes.fq
</nowiki>
</nowiki>
 +
 +
 +
* '''Sort a file with header line''': that is, we don't want the first line to be sorted
 +
 +
$ echo -e "x\ty"; for i in {1..10}; do echo -e $i"\t"$RANDOM; done | (read -r; printf "%s\n" "$REPLY"; sort -k2,2n)
 +
<!-- ##### DO NOT edit below this line unless you know what you are doing. ##### -->
<!-- ##### DO NOT edit below this line unless you know what you are doing. ##### -->

Revision as of 14:19, 26 June 2013

Project name Main project page
Previous entry      Next entry

One-liners with GNU tools


  • Toolbox:
    • AWK
    • grep
    • sed
    • cut
    • tr
    • wc


  • Skip a subset of successive lines:
for i in {1..10}; do echo $i; done | sed 3,6d


  • Extract a subset of successive lines:
$ for i in {1..20}; do echo $i; done | sed -n 3,5p


  • Use absolute values:
$ for i in {-5..5}; do echo $i; done | awk 'function abs(x){return (((x < 0.0) ? -x : x) + 0.0)} {print abs($1)}'


  • Extract the best snp per gene:
$ echo -e "gene\tsnp\tpvalue\ng1\ts1\t0.3\ng1\ts2\t0.002\ng2\ts2\t0.7\ng2\ts3\t0.05" > dat.txt
gene    snp     pvalue
g1      s1      0.3
g1      s2      0.002
g2      s2      0.7
g2      s3      0.05
$ cat dat.txt | sed 1d | sort -k1,1 -k3,3 | awk '{print $3"\t"$2"\t"$1}' | uniq -f2
g1      s2      0.002
g2      s3      0.05


  • Loop over pairs:
$ subgroups=("s1" "s2" "s3" "s4"); for i in {0..2}; do let a=$i+1; for j in $(seq $a 3); do s1=${subgroups[$i]}; s2=${subgroups[$j]}; echo $s1 $s2; done; done


$ awk 'BEGIN{RS=">"} {if(NF==0)next; split($0,a,"\n"); printf "@"a[1]"\n"a[2]"\n+\n"; \
for(i=1;i<=length(a[2]);i++)printf "}"; printf"\n"}' probes.fa > probes.fq


  • Sort a file with header line: that is, we don't want the first line to be sorted
$ echo -e "x\ty"; for i in {1..10}; do echo -e $i"\t"$RANDOM; done | (read -r; printf "%s\n" "$REPLY"; sort -k2,2n)



Personal tools