-
Notifications
You must be signed in to change notification settings - Fork 6
/
covid19_call_variants.ont.sh
executable file
·79 lines (62 loc) · 2.36 KB
/
covid19_call_variants.ont.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/bash
# To run this script:
# ./covid19_call_variants.artic.sh <samplename>
# This pipeline needs the fastq file of "pass" reads
set -e
sample_filename="${1}"
artic_primer_version="${2}"
artic_primer_scheme="V${artic_primer_version}"
#### Default parameters
: "${medaka_model:=r941_min_high_g360}" # Default Medaka model is for MinION (or GridION) R9.4.1 flowcells using the fast Guppy basecaller version 3.6.0, high accuracy base calling
: "${min_read_length:=400}"
: "${max_read_length:=600}" # Filters out all reads above 600 bp
: "${min_read_quality:=7}" # Filters out all reads below a quality of 7
: "${normalized_coverage:=200}" # Normalize to coverage of 200x to reduce runtime
: "${threads:=6}" # Number of threads for running the minimap2/medaka pipeline
# shellcheck disable=SC2154
: "${prefix=report}"
#### 1. Length and quality filtering
echo "[1] trimming/filtering reads with seqkit"
# shellcheck disable=SC1091
source activate jobscript-env
# shellcheck disable=SC2002
cat "${sample_filename}" \
| seqkit \
seq \
--min-len ${min_read_length} \
--max-len ${max_read_length} \
--min-qual ${min_read_quality} \
--out-file "${prefix}.filtered.fastq"
# shellcheck disable=SC2086
echo "total reads after filtering: $(wc -l ${prefix}.filtered.fastq)"
# shellcheck disable=SC1091
source deactivate
#### 2. Alignment, variant calling, and consensus creation
# minimap2 alignment, Medaka for consensus creation and variant calling (experimental)
echo "[2] running ARTIC pipeline with ${artic_primer_scheme} primers"
conda run -n artic \
artic minion \
--medaka \
--medaka-model "${medaka_model}" \
--normalise "${normalized_coverage}" \
--scheme-directory /primer_schemes \
--threads "${threads}" \
--read-file "${prefix}.filtered.fastq" \
--no-longshot \
--strict \
nCoV-2019/V4.1 \
"${prefix}"
#### 3. Variant annotation
gunzip "${prefix}.pass.vcf.gz"
# Replace the vcf chromosome name to the verison that snpEff will recognize
# (GenBank sequence NC_045512.2, which is identical to RefSeq MN908947.3)
echo "[3] cleaning up"
### Move files around and clean up
mv "${prefix}.pass.vcf" variants.vcf
mv "${prefix}.consensus.fasta" consensus.fa
mv "${prefix}.sorted.bam" covid19.bam
mv "${prefix}.sorted.bam.bai" covid19.bam.bai
mv "${prefix}.minion.log.txt" artic.log
rm -rf "${prefix}.*"
##### 6. Run generate_tsv.py
echo "[ ] finished!"