@@ -53,9 +53,9 @@ def get_normal_pileup_table(wildcards):
53
53
def get_somatic_tn_callers (wildcards ):
54
54
"""Returns somatic variants found with tumor-normal variant
55
55
callers. For tumor-normal samples, extra somatic callers
56
- (i.e. MuSE and Strelka) are run. Tumor-only return empty
57
- list (rule already has reference in input section).
58
- See config['pairs'] for tumor, normal pairs.
56
+ (i.e. MuSE and Strelka and DeepSomatic ) are run. Tumor-only
57
+ samples return an empty list (rule already has reference
58
+ in input section). See config['pairs'] for tumor, normal pairs.
59
59
"""
60
60
tumor = wildcards .name
61
61
normal = tumor2normal [tumor ]
@@ -746,6 +746,70 @@ rule clairs_tumor_only:
746
746
"""
747
747
748
748
749
+ rule deepsomatic :
750
+ """
751
+ Data processing step to call somatic variants using deep neural
752
+ network in tumor-normal pairs. DeepSomatic is an extension of the
753
+ deep learning-based variant caller DeepVariant that takes aligned
754
+ reads (in BAM or CRAM format) from tumor and normal data, produces
755
+ pileup image tensors from them, classifies each tensor using a CNN,
756
+ and finally reports somatic variants in a standard VCF or gVCF file.
757
+ This rule runs all three steps in the deepsomatic pipeline as a one
758
+ step: i.e. make_examples, call_variants, and postprocess_variants.
759
+ This is not optimal for large-scale projects as it will consume a lot
760
+ of resources inefficently (only the 2nd step in the dv pipeline can
761
+ make use of GPU-computing). As so, it is better to run the 1st/3rd
762
+ step on a normal compute node and run the 2nd step on a GPU node.
763
+ @Input:
764
+ Duplicate marked, sorted Tumor-Normal BAM file (scatter)
765
+ @Output:
766
+ Single-sample VCF file with called somatic variants
767
+ """
768
+ input :
769
+ tumor = join (workpath , "BAM" , "{name}.sorted.bam" ),
770
+ normal = get_normal_sorted_bam
771
+ output :
772
+ vcf = join (workpath , "deepsomatic" , "somatic" , "{name}.deepsomatic.vcf" ),
773
+ params :
774
+ rname = "deepsom" ,
775
+ genome = config ['references' ]['GENOME' ],
776
+ tmpdir = tmpdir ,
777
+ # Building option for deepsomatic config, where:
778
+ # @WGS = --model_type=WGS
779
+ # @WES = --model_type=WES (may be added in future)
780
+ dv_model_type = "WGS" ,
781
+ # Get tumor and normal sample names
782
+ tumor = '{name}' ,
783
+ # Building option for the paired normal sorted bam
784
+ normal_bam_option = lambda w : "--reads_normal={0}.sorted.bam" .format (
785
+ join (workpath , "BAM" , tumor2normal [w .name ])
786
+ ) if tumor2normal [w .name ] else "" ,
787
+ # Building option for the normal sample name
788
+ normal_name_option = lambda w : "--sample_name_normal={0}" .format (
789
+ tumor2normal [w .name ]
790
+ ) if tumor2normal [w .name ] else "" ,
791
+ threads : int (allocated ("threads" , "deepsomatic" , cluster ))
792
+ container : config ['images' ]['deepsomatic' ]
793
+ envmodules : config ['tools' ]['deepsomatic' ]
794
+ shell : """
795
+ # Setups temporary directory for
796
+ # intermediate files with built-in
797
+ # mechanism for deletion on exit
798
+ if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
799
+ tmp=$(mktemp -d -p "{params.tmpdir}")
800
+ trap 'rm -rf "${{tmp}}"' EXIT
801
+
802
+ run_deepsomatic \\
803
+ --model_type={params.dv_model_type} \\
804
+ --ref={params.genome} \\
805
+ --reads_tumor={input.tumor} {params.normal_bam_option} \\
806
+ --sample_name_tumor={params.tumor} {params.normal_name_option} \\
807
+ --output_vcf={output.vcf} \\
808
+ --num_shards={threads} \\
809
+ --intermediate_results_dir=${{tmp}}
810
+ """
811
+
812
+
749
813
rule muse :
750
814
"""Data-processing step to call somatic mutations with MuSE. This tool is
751
815
unique in accounting for tumor heterogeneity using a sample-specific error
@@ -1077,11 +1141,9 @@ rule somatic_merge_tumor:
1077
1141
Variants found in at least 2 callers
1078
1142
"""
1079
1143
input :
1080
- tn_callers = get_somatic_tn_callers ,
1144
+ tn_callers = get_somatic_tn_callers , # i.e muse, strelka, deepsomatic
1081
1145
octopus = join (workpath , "octopus" , "somatic" , "{name}.octopus.filtered.norm.vcf" ),
1082
1146
mutect2 = join (workpath , "mutect2" , "somatic" , "{name}.mutect2.filtered.norm.vcf" ),
1083
- # muse = join(workpath, "muse", "somatic", "{name}.muse.filtered.norm.vcf"),
1084
- # strelka = join(workpath, "strelka", "somatic", "{name}.strelka.filtered.norm.vcf"),
1085
1147
output :
1086
1148
merged = join (workpath , "merged" , "somatic" , "{name}.merged.filtered.norm.vcf.gz" ),
1087
1149
params :
0 commit comments