Skip to content

Commit ea59355

Browse files
committed
Adding regions file to WES mk_examples command to speed up run time
1 parent ebca284 commit ea59355

File tree

2 files changed

+18
-8
lines changed

2 files changed

+18
-8
lines changed

workflow/rules/germline.smk

+7-4
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ rule deepvariant_make_examples:
3131
input:
3232
bam = join(workpath, "BAM", "{name}.sorted.bam"),
3333
bai = join(workpath, "BAM", "{name}.sorted.bam.bai"),
34+
bed = provided(join(workpath, "references", "wes_regions_50bp_padded.bed"), run_wes),
3435
output:
3536
success = join(workpath, "deepvariant", "mk_examples", "{name}.make_examples.success"),
3637
params:
@@ -46,7 +47,11 @@ rule deepvariant_make_examples:
4647
w.name,
4748
int(allocated("threads", "deepvariant_make_examples", cluster))
4849
)),
49-
message: "Running DeepVariant make_examples on '{input.bam}' input file"
50+
# Call variants within regions BED
51+
# file created from WES capture kit
52+
wes_region_option = lambda _: "--regions {0}".format(
53+
join(workpath, "references", "wes_regions_50bp_padded.bed"),
54+
) if run_wes else '',
5055
threads: int(allocated("threads", "deepvariant_make_examples", cluster))
5156
container: config['images']['deepvariant']
5257
envmodules: config['tools']['deepvariant']
@@ -74,7 +79,7 @@ rule deepvariant_make_examples:
7479
--halt 2 \\
7580
--line-buffer \\
7681
make_examples \\
77-
--mode calling \\
82+
--mode calling {params.wes_region_option} \\
7883
--ref {params.genome} \\
7984
--reads {input.bam} \\
8085
--examples {params.example} \\
@@ -131,7 +136,6 @@ rule deepvariant_call_variants:
131136
# @WES = "/opt/models/wes/model.ckpt"
132137
# @WGS = "/opt/models/wgs/model.ckpt"
133138
ckpt = lambda _: "/opt/models/wes/model.ckpt" if run_wes else "/opt/models/wgs/model.ckpt",
134-
message: "Running DeepVariant call_variants on '{wildcards.name}' sample"
135139
threads: int(allocated("threads", "deepvariant_call_variants", cluster))
136140
container: config['images']['deepvariant']
137141
envmodules: config['tools']['deepvariant']
@@ -197,7 +201,6 @@ rule deepvariant_postprocess_variants:
197201
w.name,
198202
int(allocated("threads", "deepvariant_make_examples", cluster))
199203
)),
200-
message: "Running DeepVariant postprocess_variants on '{input.callvar}' input file"
201204
threads: int(allocated("threads", "deepvariant_postprocess_variants", cluster))
202205
container: config['images']['deepvariant']
203206
envmodules: config['tools']['deepvariant']

workflow/rules/somatic.smk

+11-4
Original file line numberDiff line numberDiff line change
@@ -630,8 +630,9 @@ rule hmftools_sage:
630630
data using the same set of options for WGS. At the current moment, sage
631631
does not have an option to restrict variant calling to specific regions.
632632
It does have an -high_depth_mode option; however, the authors state it
633-
should only be used for small targeted panels. For more information
634-
about hmftools visit github:
633+
should only be used for small targeted panels. In the 'somatic_selectvar'
634+
rule, any variants outside the padded regions/capture-kit BED file are
635+
removed in WES data. For more information about hmftools visit github:
635636
https://github.com/hartwigmedical/hmftools
636637
@Input:
637638
Sorted BAM file (scatter-per-tumor-sample)
@@ -796,6 +797,11 @@ rule deepsomatic_make_examples:
796797
# @WES = "/opt/models/deepsomatic/wes"
797798
# @WGS = "/opt/models/deepsomatic/wgs"
798799
ckpt = lambda _: "/opt/models/deepsomatic/wes" if run_wes else "/opt/models/deepsomatic/wgs",
800+
# Call variants within regions BED
801+
# file created from WES capture kit
802+
wes_region_option = lambda _: "--regions {0}".format(
803+
join(workpath, "references", "wes_regions_50bp_padded.bed"),
804+
) if run_wes else '',
799805
# Get tumor and normal sample names
800806
tumor = '{name}',
801807
# Building option for the paired normal sorted bam
@@ -848,7 +854,7 @@ rule deepsomatic_make_examples:
848854
--reads_tumor {input.tumor} {params.normal_bam_option} \\
849855
--sample_name_tumor {params.tumor} {params.normal_name_option} \\
850856
--examples {params.example} \\
851-
--checkpoint "{params.ckpt}" \\
857+
--checkpoint "{params.ckpt}" {params.wes_region_option} \\
852858
--vsc_max_fraction_indels_for_non_target_sample "0.5" \\
853859
--vsc_max_fraction_snps_for_non_target_sample "0.5" \\
854860
--vsc_min_fraction_indels "0.05" \\
@@ -1337,7 +1343,8 @@ rule somatic_selectvar:
13371343
somatic callers. This step takes the somatic calls from all the callers
13381344
(assumes already re-headered if needed, i.e. strelka and muse), and then
13391345
runs bcftools norm to split multi-allelic sites AND gatk SelectVariants
1340-
to filter sites.
1346+
to filter sites. For WES data, this step will also remove any variants
1347+
that are outside the padded regions/capture-kit BED file.
13411348
@Input:
13421349
Per sample, per caller, VCF somatic variants
13431350
@Output:

0 commit comments

Comments
 (0)