1
1
# Depreciated rules that may still be useful for some projects
2
+ def get_normal_sorted_bam (wildcards ):
3
+ """
4
+ Returns a tumor samples paired normal
5
+ See config['pairs'] for tumor, normal pairs.
6
+ """
7
+ normal = tumor2normal [wildcards .name ]
8
+ if normal :
9
+ # Runs in a tumor, normal mode
10
+ return join (workpath , "BAM" , "{0}.sorted.bam" .format (normal ))
11
+ else :
12
+ # Runs in tumor-only mode
13
+ return []
14
+
2
15
3
16
# Depreciated germline variant calling rule(s)
4
17
rule deepvariant :
@@ -57,4 +70,83 @@ rule deepvariant:
57
70
--output_vcf={output.vcf} \\
58
71
--num_shards={threads} \\
59
72
--intermediate_results_dir=${{tmp}}
60
- """
73
+ """
74
+
75
+ # Depreciated somatic variant calling rule(s)
76
+ rule deepsomatic :
77
+ """
78
+ Data processing step to call somatic variants using deep neural
79
+ network in tumor-normal pairs. DeepSomatic is an extension of the
80
+ deep learning-based variant caller DeepVariant that takes aligned
81
+ reads (in BAM or CRAM format) from tumor and normal data, produces
82
+ pileup image tensors from them, classifies each tensor using a CNN,
83
+ and finally reports somatic variants in a standard VCF or gVCF file.
84
+ This rule runs all three steps in the deepsomatic pipeline as a one
85
+ step: i.e. make_examples, call_variants, and postprocess_variants.
86
+ This is not optimal for large-scale projects as it will consume a lot
87
+ of resources inefficently (only the 2nd step in the dv pipeline can
88
+ make use of GPU-computing). As so, it is better to run the 1st/3rd
89
+ step on a normal compute node and run the 2nd step on a GPU node.
90
+ @Input:
91
+ Duplicate marked, sorted Tumor-Normal BAM file (scatter)
92
+ @Output:
93
+ Single-sample VCF file with called somatic variants
94
+ """
95
+ input :
96
+ tumor = join (workpath , "BAM" , "{name}.sorted.bam" ),
97
+ normal = get_normal_sorted_bam
98
+ output :
99
+ vcf = join (workpath , "deepsomatic" , "somatic" , "{name}.deepsomatic.vcf" ),
100
+ params :
101
+ rname = "deepsom" ,
102
+ genome = config ['references' ]['GENOME' ],
103
+ tmpdir = tmpdir ,
104
+ # Building option for deepsomatic config, where:
105
+ # @WGS = --model_type=WGS
106
+ # @WES = --model_type=WES (may be added in future)
107
+ dv_model_type = "WGS" ,
108
+ # Get tumor and normal sample names
109
+ tumor = '{name}' ,
110
+ # Building option for the paired normal sorted bam
111
+ normal_bam_option = lambda w : "--reads_normal={0}.sorted.bam" .format (
112
+ join (workpath , "BAM" , tumor2normal [w .name ])
113
+ ) if tumor2normal [w .name ] else "" ,
114
+ # Building option for the normal sample name
115
+ normal_name_option = lambda w : "--sample_name_normal={0}" .format (
116
+ tumor2normal [w .name ]
117
+ ) if tumor2normal [w .name ] else "" ,
118
+ threads : int (allocated ("threads" , "deepsomatic" , cluster ))
119
+ container : config ['images' ]['deepsomatic' ]
120
+ envmodules : config ['tools' ]['deepsomatic' ]
121
+ shell : """
122
+ # Setups temporary directory for
123
+ # intermediate files with built-in
124
+ # mechanism for deletion on exit
125
+ if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
126
+ tmp=$(mktemp -d -p "{params.tmpdir}")
127
+ trap 'du -sh "${{tmp}}"; rm -rf "${{tmp}}"' EXIT
128
+
129
+ # Export OpenBLAS variable to
130
+ # control the number of threads
131
+ # in a thread pool. By setting
132
+ # this variable to 1, work is
133
+ # done in the thread that ran
134
+ # the operation, rather than
135
+ # disbatching the work to a
136
+ # thread pool. If this option
137
+ # is not provided, it can lead
138
+ # to nested parallelism.
139
+ # See this issue for more info:
140
+ # https://github.com/google/deepsomatic/issues/28
141
+ export OPENBLAS_NUM_THREADS=1
142
+
143
+ # Run deepsomatic
144
+ run_deepsomatic \\
145
+ --model_type={params.dv_model_type} \\
146
+ --ref={params.genome} \\
147
+ --reads_tumor={input.tumor} {params.normal_bam_option} \\
148
+ --sample_name_tumor={params.tumor} {params.normal_name_option} \\
149
+ --output_vcf={output.vcf} \\
150
+ --num_shards={threads} \\
151
+ --intermediate_results_dir=${{tmp}}
152
+ """
0 commit comments