-
Notifications
You must be signed in to change notification settings - Fork 1
/
interleave.sh
executable file
·29 lines (25 loc) · 1.17 KB
/
interleave.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#
# Deinterleaves a FASTQ file of paired reads into two FASTQ
# files specified on the command line. Optionally GZip compresses the output
# FASTQ files using pigz if the 3rd command line argument is the word "compress"
#
# Can deinterleave 100 million paired reads (200 million total
# reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s)
#
# Latest code: https://gist.github.com/3521724
# Also see my interleaving script: https://gist.github.com/4544979
#
# Inspired by Torsten Seemann's blog post:
# http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html
# Set up some defaults
GZIP_OUTPUT=0
PIGZ_COMPRESSION_THREADS=10
# If the third argument is the word "compress" then we'll compress the output using pigz
if [[ $3 == "compress" ]]; then
GZIP_OUTPUT=1
fi
if [[ ${GZIP_OUTPUT} == 0 ]]; then
paste -d "~" - - - - - - - - | tee >(cut -d "~" -f 1-4 | tr "~" "\n" > $1) | cut -d "~" -f 5-8 | tr "~" "\n" > $2
else
paste -d "~" - - - - - - - - | tee >(cut -d "~" -f 1-4 | tr "~" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -d "~" -f 5-8 | tr "~" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2
fi