-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile
104 lines (93 loc) · 3.27 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import re
## prefix every task command with:
# set -o pipefail # trace ERR through pipes
# umask 002 # group write permissions
# export TMPDIR={config['tmpdir']} # configure temp directory
# export SINGULARITY_TMPDIR={config['tmpdir']} # configure temp directory
shell.prefix(
f"set -o pipefail; umask 002; export TMPDIR={config['tmpdir']}; export SINGULARITY_TMPDIR={config['tmpdir']}; "
)
batch = config["batch"]
movie = config["ccsReads"]
# Get expected barcodes and sample map from standard barcode_biosample_csv used in SL
# map both directions
# Assumes csv has header (actual names ignored).
# two columns barcode,sampleId
barcode2sample = dict( row.split(',')
for i,row in enumerate( open( config["biosamples"] ).read().strip().split('\n') )
if i > 0 )
sample2barcode = { v:k for k,v in barcode2sample.items() }
ref = config["ref"]["shortname"]
print(f"Processing batch {batch} with reference {ref}.")
# checkpoint for samples that fail to yield data in demux
def _get_demuxed_samples( wildcards ):
'''Some samples may not have yield (ie failed), so update samples after demuxing'''
demuxdir = checkpoints.demux_ubam.get( **wildcards ).output.odir
outputBarcodes = glob_wildcards( f'{demuxdir}/demultiplex.{{barcode}}.bam' ).barcode
return [ barcode2sample[ bc ] for bc in outputBarcodes if bc != 'unbarcoded' ]
targets = []
include: "rules/common.smk"
include: "rules/demux.smk"
include: "rules/preprocess.smk"
include: "rules/pbmm2.smk"
include: "rules/deepvariant.smk"
include: "rules/whatshap.smk"
include: "rules/pbsv.smk"
if config["run_cohort"]:
include: "rules/glnexus.smk"
if config[ "probes" ] != "None":
include: "rules/hsmetrics.smk"
if config[ "pharmcat" ][ "run_analysis" ]:
include: "rules/pharmcat.smk"
include: "rules/pangu_cyp2d6.smk"
if config[ "annotate" ][ "gVCF" ]:
include: "rules/annotate.smk"
# DV targets
targets.append(
lambda wildcards: \
[
f"batches/{batch}/{sample}/deepvariant/{sample}.{ref}.deepvariant.{suffix}"
for suffix in [
"vcf.gz",
"vcf.gz.tbi",
"g.vcf.gz",
"g.vcf.gz.tbi",
"visual_report.html",
"vcf.stats.txt",
]
for sample in _get_demuxed_samples( wildcards)
]
)
# WH targets
targets.append(
lambda wildcards: \
[
f"batches/{batch}/{sample}/whatshap/{sample}.{ref}.deepvariant.{suffix}"
for suffix in [
"phased.vcf.gz",
"phased.vcf.gz.tbi",
"phased.gtf",
"phased.tsv",
"phased.blocklist",
"haplotagged.bam",
"haplotagged.bam.bai",
]
for sample in _get_demuxed_samples( wildcards )
]
)
# pbsv targets
targets.append(
lambda wildcards:
[
f"batches/{batch}/{sample}/pbsv/{sample}.{ref}.pbsv.vcf"
for sample in _get_demuxed_samples( wildcards )
]
)
# QC extras
if config['QC']['runQC']:
include: "rules/qc_cov.smk"
include: "rules/qc_ext.smk"
ruleorder: deepvariant_postprocess_variants > tabix_vcf
rule all:
input:
targets