Skip to content

Commit 57b666c

Browse files
committed
Breaking up deepsomatic into its constituent steps
1 parent d145b39 commit 57b666c

File tree

4 files changed

+326
-49
lines changed

4 files changed

+326
-49
lines changed

config/cluster/slurm.json

+21-2
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,30 @@
7373
"gres": "lscratch:512"
7474
},
7575
"deepsomatic": {
76-
"threads": "24",
77-
"mem": "64G",
76+
"threads": "36",
77+
"mem": "192G",
7878
"time": "1-18:00:00",
7979
"gres": "lscratch:750"
8080
},
81+
"deepsomatic_make_examples": {
82+
"threads": "36",
83+
"mem": "96G",
84+
"time": "1-00:00:00",
85+
"gres": "lscratch:750"
86+
},
87+
"deepsomatic_call_variants": {
88+
"threads": "16",
89+
"mem": "60G",
90+
"partition": "gpu",
91+
"gres": "gpu:a100:1,lscratch:450",
92+
"time": "1-00:00:00"
93+
},
94+
"deepsomatic_postprocess_variants": {
95+
"threads": "4",
96+
"mem": "64G",
97+
"time": "1-00:00:00",
98+
"gres": "lscratch:256"
99+
},
81100
"deepvariant": {
82101
"threads": "18",
83102
"mem": "48G",

config/cluster/uge.json

+15
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,21 @@
7575
"mem": "4G",
7676
"partition": ""
7777
},
78+
"deepsomatic_call_variants": {
79+
"mem": "4G",
80+
"partition": "",
81+
"threads": "8"
82+
},
83+
"deepsomatic_make_examples": {
84+
"mem": "4G",
85+
"partition": "",
86+
"threads": "8"
87+
},
88+
"deepsomatic_postprocess_variants": {
89+
"mem": "8G",
90+
"partition": "",
91+
"threads": "4"
92+
},
7893
"deepvariant": {
7994
"mem": "3G",
8095
"partition": "",

workflow/rules/depreciated.smk

+93-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,17 @@
11
# Depreciated rules that may still be useful for some projects
2+
def get_normal_sorted_bam(wildcards):
3+
"""
4+
Returns a tumor samples paired normal
5+
See config['pairs'] for tumor, normal pairs.
6+
"""
7+
normal = tumor2normal[wildcards.name]
8+
if normal:
9+
# Runs in a tumor, normal mode
10+
return join(workpath, "BAM", "{0}.sorted.bam".format(normal))
11+
else:
12+
# Runs in tumor-only mode
13+
return []
14+
215

316
# Depreciated germline variant calling rule(s)
417
rule deepvariant:
@@ -57,4 +70,83 @@ rule deepvariant:
5770
--output_vcf={output.vcf} \\
5871
--num_shards={threads} \\
5972
--intermediate_results_dir=${{tmp}}
60-
"""
73+
"""
74+
75+
# Depreciated somatic variant calling rule(s)
76+
rule deepsomatic:
77+
"""
78+
Data processing step to call somatic variants using deep neural
79+
network in tumor-normal pairs. DeepSomatic is an extension of the
80+
deep learning-based variant caller DeepVariant that takes aligned
81+
reads (in BAM or CRAM format) from tumor and normal data, produces
82+
pileup image tensors from them, classifies each tensor using a CNN,
83+
and finally reports somatic variants in a standard VCF or gVCF file.
84+
This rule runs all three steps in the deepsomatic pipeline as a one
85+
step: i.e. make_examples, call_variants, and postprocess_variants.
86+
This is not optimal for large-scale projects as it will consume a lot
87+
of resources inefficently (only the 2nd step in the dv pipeline can
88+
make use of GPU-computing). As so, it is better to run the 1st/3rd
89+
step on a normal compute node and run the 2nd step on a GPU node.
90+
@Input:
91+
Duplicate marked, sorted Tumor-Normal BAM file (scatter)
92+
@Output:
93+
Single-sample VCF file with called somatic variants
94+
"""
95+
input:
96+
tumor = join(workpath, "BAM", "{name}.sorted.bam"),
97+
normal = get_normal_sorted_bam
98+
output:
99+
vcf = join(workpath, "deepsomatic", "somatic", "{name}.deepsomatic.vcf"),
100+
params:
101+
rname = "deepsom",
102+
genome = config['references']['GENOME'],
103+
tmpdir = tmpdir,
104+
# Building option for deepsomatic config, where:
105+
# @WGS = --model_type=WGS
106+
# @WES = --model_type=WES (may be added in future)
107+
dv_model_type = "WGS",
108+
# Get tumor and normal sample names
109+
tumor = '{name}',
110+
# Building option for the paired normal sorted bam
111+
normal_bam_option = lambda w: "--reads_normal={0}.sorted.bam".format(
112+
join(workpath, "BAM", tumor2normal[w.name])
113+
) if tumor2normal[w.name] else "",
114+
# Building option for the normal sample name
115+
normal_name_option = lambda w: "--sample_name_normal={0}".format(
116+
tumor2normal[w.name]
117+
) if tumor2normal[w.name] else "",
118+
threads: int(allocated("threads", "deepsomatic", cluster))
119+
container: config['images']['deepsomatic']
120+
envmodules: config['tools']['deepsomatic']
121+
shell: """
122+
# Setups temporary directory for
123+
# intermediate files with built-in
124+
# mechanism for deletion on exit
125+
if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
126+
tmp=$(mktemp -d -p "{params.tmpdir}")
127+
trap 'du -sh "${{tmp}}"; rm -rf "${{tmp}}"' EXIT
128+
129+
# Export OpenBLAS variable to
130+
# control the number of threads
131+
# in a thread pool. By setting
132+
# this variable to 1, work is
133+
# done in the thread that ran
134+
# the operation, rather than
135+
# disbatching the work to a
136+
# thread pool. If this option
137+
# is not provided, it can lead
138+
# to nested parallelism.
139+
# See this issue for more info:
140+
# https://github.com/google/deepsomatic/issues/28
141+
export OPENBLAS_NUM_THREADS=1
142+
143+
# Run deepsomatic
144+
run_deepsomatic \\
145+
--model_type={params.dv_model_type} \\
146+
--ref={params.genome} \\
147+
--reads_tumor={input.tumor} {params.normal_bam_option} \\
148+
--sample_name_tumor={params.tumor} {params.normal_name_option} \\
149+
--output_vcf={output.vcf} \\
150+
--num_shards={threads} \\
151+
--intermediate_results_dir=${{tmp}}
152+
"""

0 commit comments

Comments
 (0)