Skip to content

Commit 21118eb

Browse files
committed
Adding refs and rule for hmftools sage
1 parent b5aa555 commit 21118eb

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

config/genome.json

+6
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@
6868
"HMFTOOLS_AMBER_JAR": "/data/OpenOmics/references/genome-seek/hmftools/amber-3.5.jar",
6969
"HMFTOOLS_COBALT_JAR": "/data/OpenOmics/references/genome-seek/hmftools/cobalt-1.11.jar",
7070
"HMFTOOLS_PURPLE_JAR": "/data/OpenOmics/references/genome-seek/hmftools/purple_v3.2.jar",
71+
"HMFTOOLS_SAGE_JAR": "/data/OpenOmics/references/genome-seek/hmftools/sage_v3.4.1.jar",
72+
"HMFTOOLS_SAGE_REF_VERSION": "38",
73+
"HMFTOOLS_SAGE_HOTSPOTS": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz",
74+
"HMFTOOLS_SAGE_PANEL": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz",
75+
"HMFTOOLS_SAGE_HIGH_CONF": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz",
76+
"HMFTOOLS_SAGE_ENSEMBL_DATA": "/data/OpenOmics/references/genome-seek/hmftools/v5_34/ref/38/common/ensembl_data/",
7177
"HMFTOOLS_AMBER_LOCI": "/data/OpenOmics/references/genome-seek/hmftools/GermlineHetPon.hg38.vcf.gz",
7278
"HMFTOOLS_GC_PROFILE": "/data/OpenOmics/references/genome-seek/hmftools/GC_profile.hg38.1000bp.cnp",
7379
"HMFTOOLS_DIPLOID": "/data/OpenOmics/references/genome-seek/hmftools/DiploidRegions.hg38.bed.gz",

workflow/rules/somatic.smk

+78
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,21 @@ def get_normal_recal_bam(wildcards):
1919
# Runs in tumor-only mode
2020
return []
2121

22+
23+
def get_normal_sorted_bam(wildcards):
24+
"""
25+
Returns a tumor samples paired normal
26+
See config['pairs'] for tumor, normal pairs.
27+
"""
28+
normal = tumor2normal[wildcards.name]
29+
if normal:
30+
# Runs in a tumor, normal mode
31+
return join(workpath, "BAM", "{0}.sorted.bam".format(normal))
32+
else:
33+
# Runs in tumor-only mode
34+
return []
35+
36+
2237
def get_normal_pileup_table(wildcards):
2338
"""
2439
Returns a tumor samples paired normal pileup
@@ -612,6 +627,69 @@ rule gatk_filter_mutect2:
612627
"""
613628

614629

630+
rule hmftools_sage:
631+
"""Data-processing step to call somatic variants in TO and TN
632+
samples using hmftools sage. HMF Tools is a suite of tools the
633+
Hartwig Medical Foundation developed to analyze genomic data. Amber
634+
and cobalt must be run prior to running purple. For more information
635+
about hmftools visit: https://github.com/hartwigmedical/hmftools
636+
@Input:
637+
Sorted BAM file (scatter-per-tumor-sample)
638+
@Output:
639+
Per sample somatic variants in VCF format
640+
"""
641+
input:
642+
tumor = join(workpath, "BAM", "{name}.sorted.bam"),
643+
normal = get_normal_sorted_bam
644+
output:
645+
vcf = join(workpath, "sage", "somatic", "{name}.sage.vcf"),
646+
params:
647+
rname = 'hmfsage',
648+
tumor = '{name}',
649+
genome = config['references']['GENOME'],
650+
amber_jar = config['references']['HMFTOOLS_SAGE_JAR'],
651+
ref_version = config['references']['HMFTOOLS_SAGE_REF_VERSION'],
652+
hotspots = config['references']['HMFTOOLS_SAGE_HOTSPOTS'],
653+
panel = config['references']['HMFTOOLS_SAGE_PANEL'],
654+
high_conf = config['references']['HMFTOOLS_SAGE_HIGH_CONF'],
655+
ensembl_data = config['references']['HMFTOOLS_SAGE_ENSEMBL_DATA'],
656+
# For UGE/SGE clusters memory is allocated
657+
# per cpu, so we must calculate total mem
658+
# as the product of threads and memory
659+
memory = lambda _: int(
660+
int(allocated("mem", "hmftools_sage", cluster).lower().rstrip('g')) * \
661+
int(allocated("threads", "hmftools_sage", cluster))
662+
)-1 if run_mode == "uge" \
663+
else allocated("mem", "hmftools_sage", cluster).lower().rstrip('g'),
664+
# Building optional argument for paired normal
665+
normal_name = lambda w: "-reference {0}".format(
666+
tumor2normal[w.name]
667+
) if tumor2normal[w.name] else "",
668+
normal_bam = lambda w: "-reference_bam {0}.sorted.bam".format(
669+
join(workpath, "BAM", tumor2normal[w.name])
670+
) if tumor2normal[w.name] else "",
671+
threads:
672+
int(allocated("threads", "hmftools_sage", cluster)),
673+
container: config['images']['genome-seek_cnv']
674+
envmodules: config['tools']['rlang']
675+
shell: """
676+
# Call somatic variants with hmftools
677+
# Somatic Alterations in Genome (SAGE)
678+
java -Xmx{params.memory}g -cp {params.amber_jar} \\
679+
com.hartwig.hmftools.sage.SageApplication \\
680+
-threads {threads} \\
681+
-tumor {params.tumor} {params.normal_name} \\
682+
-tumor_bam {input.tumor} {params.normal_bam} \\
683+
-ref_genome_version {params.ref_version} \\
684+
-ref_genome {params.genome} \\
685+
-hotspots {params.hotspots} \\
686+
-panel_bed {params.panel} \\
687+
-high_confidence_bed {params.high_conf} \\
688+
-ensembl_data_dir {params.ensembl_data} \\
689+
-output_vcf {output.vcf}
690+
"""
691+
692+
615693
rule muse:
616694
"""Data-processing step to call somatic mutations with MuSE. This tool is
617695
unique in accounting for tumor heterogeneity using a sample-specific error

0 commit comments

Comments
 (0)