Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to analyse only mitochondria #608

Merged
merged 9 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- A new analysis option `mito` to call and annotate only mitochondrial variants [#608](https://github.com/nf-core/raredisease/pull/608)

### `Changed`

### `Fixed`
Expand Down
20 changes: 10 additions & 10 deletions conf/modules/prepare_references.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ process {
ext.when = {!params.bwa && (params.aligner == "sentieon" || params.mt_aligner == "sentieon")}
}

withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
}

withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
}

withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
}

withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' {
Expand All @@ -67,8 +67,8 @@ process {
ext.when = {!params.mt_fasta}
}

withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) }
withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) }
}

withName: '.*PREPARE_REFERENCES:GATK_SD' {
Expand All @@ -79,8 +79,8 @@ process {
ext.args = { "--interval-file-name ${meta.id}_mt" }
}

withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes)}
withName: '.*PREPARE_REFERENCES:GATK_SD_MT' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes)}
}

withName: '.*PREPARE_REFERENCES:TABIX_DBSNP' {
Expand Down
4 changes: 2 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@
"analysis_type": {
"type": "string",
"default": "wgs",
"description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'. This changes resources consumed and tools used.",
"description": "Specifies which analysis type for the pipeline- either 'wgs', 'wes' or 'mito'. This changes resources consumed and tools used.",
"fa_icon": "fas fa-align-center",
"enum": ["wgs", "wes"]
"enum": ["wgs", "wes", "mito"]
},
"bwa_as_fallback": {
"type": "boolean",
Expand Down
19 changes: 12 additions & 7 deletions subworkflows/local/align.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@ workflow ALIGN {
ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_genome_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mt_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_mt_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
val_mbuffer_mem // integer: [mandatory] memory in megabytes
val_platform // string: [mandatory] illumina or a different technology
val_sort_threads // integer: [mandatory] number of sorting threads
Expand Down Expand Up @@ -83,7 +88,7 @@ workflow ALIGN {

// PREPARING READS FOR MT ALIGNMENT

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CONVERT_MT_BAM_TO_FASTQ (
ch_genome_bam_bai,
ch_genome_fasta,
Expand All @@ -94,11 +99,11 @@ workflow ALIGN {
ALIGN_MT (
CONVERT_MT_BAM_TO_FASTQ.out.fastq,
CONVERT_MT_BAM_TO_FASTQ.out.bam,
ch_genome_bwaindex,
ch_genome_bwamem2index,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai
ch_mt_bwaindex,
ch_mt_bwamem2index,
ch_mt_fasta,
ch_mt_dictionary,
ch_mt_fai
)

ALIGN_MT_SHIFT (
Expand Down
23 changes: 13 additions & 10 deletions subworkflows/local/call_snv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ workflow CALL_SNV {
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_intervals // channel: [optional] [ path(interval_list) ]
ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mt_dictionary // channel: [optional] [ val(meta), path(dict) ]
ch_mt_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_dictionary // channel: [optional] [ val(meta), path(dict) ]
ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_intervals // channel: [optional] [ path(interval_list) ]
ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ]
ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ]
Expand All @@ -46,7 +49,7 @@ workflow CALL_SNV {
ch_sentieon_gvcf = Channel.empty()
ch_sentieon_gtbi = Channel.empty()

if (params.variant_caller.equals("deepvariant")) {
if (params.variant_caller.equals("deepvariant") && !params.analysis_type.equals("mito")) {
CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant
ch_genome_bam_bai,
ch_genome_fasta,
Expand Down Expand Up @@ -97,12 +100,12 @@ workflow CALL_SNV {
ch_genome_tabix = GATK4_SELECTVARIANTS.out.tbi
ch_genome_vcf_tabix = ch_genome_vcf.join(ch_genome_tabix, failOnMismatch:true, failOnDuplicate:true)

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CALL_SNV_MT(
ch_mt_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_genome_dictionary,
ch_mt_fasta,
ch_mt_fai,
ch_mt_dictionary,
ch_mt_intervals
)

Expand All @@ -117,9 +120,9 @@ workflow CALL_SNV {
POSTPROCESS_MT_CALLS(
CALL_SNV_MT.out.vcf,
CALL_SNV_MT_SHIFT.out.vcf,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai,
ch_mt_fasta,
ch_mt_dictionary,
ch_mt_fai,
ch_mtshift_backchain,
ch_case_info,
ch_foundin_header,
Expand Down
45 changes: 26 additions & 19 deletions subworkflows/local/call_structural_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,16 @@ workflow CALL_STRUCTURAL_VARIANTS {

main:
ch_versions = Channel.empty()
ch_merged_svs = Channel.empty()
ch_merged_tbi = Channel.empty()

CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
.diploid_sv_vcf
.collect{it[1]}
.set{ manta_vcf }
if (!params.analysis_type.equals("mito")) {
CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
.diploid_sv_vcf
.collect{it[1]}
.set{ manta_vcf }
ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
}

if (params.analysis_type.equals("wgs")) {
CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
Expand All @@ -61,7 +66,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
}

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
ch_versions = ch_versions.mix(CALL_SV_MT.out.versions)
}
Expand All @@ -74,39 +79,41 @@ workflow CALL_STRUCTURAL_VARIANTS {
.combine(cnvnator_vcf)
.toList()
.set { vcf_list }
} else {
} else if (!params.analysis_type.equals("mito")) {
manta_vcf
.toList()
.set { vcf_list }
}
} else if (params.analysis_type.equals("wgs")){
} else if (params.analysis_type.equals("wgs")) {
tiddit_vcf
.combine(manta_vcf)
.combine(gcnvcaller_vcf)
.combine(cnvnator_vcf)
.toList()
.set { vcf_list }
} else {
} else if (!params.analysis_type.equals("mito")) {
manta_vcf
.combine(gcnvcaller_vcf)
.toList()
.set { vcf_list }
}

ch_case_info
.combine(vcf_list)
.set { merge_input_vcfs }
if (!params.analysis_type.equals("mito")) {
ch_case_info
.combine(vcf_list)
.set { merge_input_vcfs }

SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)
SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)

TABIX_TABIX (SVDB_MERGE.out.vcf)

ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
TABIX_TABIX (SVDB_MERGE.out.vcf)
ch_merged_svs = SVDB_MERGE.out.vcf
ch_merged_tbi = TABIX_TABIX.out.tbi
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
}

emit:
vcf = SVDB_MERGE.out.vcf // channel: [ val(meta), path(vcf)]
tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi)]
vcf = ch_merged_svs // channel: [ val(meta), path(vcf)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If only mito analysis is run, are no SV calls emitted?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SV in mitochondria will be emitted, but not the nuclear genome if that's what you are asking? 😅

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, so the mitochondria SVs are not input into SVDB_MERGE and emitted as ch_merged_svs, that is only the nuclear genome?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see what you mean. That does include nuclear genome and mt genome, but the difference is those calls are in vcf format. mito analysis at the moment does not generate SV calls in vcf format. I can update the documentation to make this clear.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that would be good, to clarify if the SV calls in VCF format contains MT calls or not, just as you have done for the SNV VCFs 😊

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if you run with --analysis_type mito no VCFs are procured, only the output files from CALL_SV_MT? Or are any of MANTA, TIDDIT, GERMLINECNVCALLER or CNVNATOR be run at the same time as --analysis_type mito?

And if you run with wgs, both the output files from CALL_SV_MT, as well as mitochondrial variants in VCF format from the callers in CALL_STRUCTURAL_VARIANTS are output?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yupe. That's right!

tbi = ch_merged_tbi // channel: [ val(meta), path(tbi)]
versions = ch_versions // channel: [ path(versions.yml) ]
}
34 changes: 25 additions & 9 deletions subworkflows/local/prepare_references.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
//

include { BWA_INDEX as BWA_INDEX_GENOME } from '../../modules/nf-core/bwa/index/main'
include { BWA_INDEX as BWA_INDEX_MT } from '../../modules/nf-core/bwa/index/main'
include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modules/nf-core/bwa/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEME_INDEX as BWAMEME_INDEX_GENOME } from '../../modules/nf-core/bwameme/index/main'
include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main'
include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT_SHIFT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main'
include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main'
include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes'
include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_EXTRACT_MT } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT } from '../../modules/nf-core/samtools/faidx/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT } from '../../modules/nf-core/sentieon/bwaindex/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main'
include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main'
include { TABIX_BGZIPTABIX as TABIX_BGZIPINDEX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main'
Expand Down Expand Up @@ -66,11 +69,16 @@ workflow PREPARE_REFERENCES {
// MT genome indices
SAMTOOLS_EXTRACT_MT(ch_genome_fasta, ch_fai)
ch_mt_fasta_in = Channel.empty().mix(ch_mt_fasta, SAMTOOLS_EXTRACT_MT.out.fa).collect()
SAMTOOLS_FAIDX_MT_SHIFT(ch_mt_fasta_in, [[],[]])
GATK_SD_MT_SHIFT(ch_mt_fasta_in)
GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict)
SAMTOOLS_FAIDX_MT(ch_mt_fasta_in, [[],[]])
GATK_SD_MT(ch_mt_fasta_in)
GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT.out.fai, GATK_SD_MT.out.dict)

// MT alignment indices
BWAMEM2_INDEX_MT(ch_mt_fasta_in)
BWA_INDEX_MT(ch_mt_fasta_in)
SENTIEON_BWAINDEX_MT(ch_mt_fasta_in)
ch_bwa_mt = Channel.empty().mix(SENTIEON_BWAINDEX_MT.out.index, BWA_INDEX_MT.out.index).collect()

BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
BWA_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
Expand Down Expand Up @@ -140,9 +148,12 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(GATK_SD.out.versions)
ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_EXTRACT_MT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT.out.versions)
ch_versions = ch_versions.mix(GATK_SD_MT.out.versions)
ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions)
ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT.out.versions)
ch_versions = ch_versions.mix(BWA_INDEX_MT.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT.out.versions)
ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(BWA_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions)
Expand All @@ -167,11 +178,16 @@ workflow PREPARE_REFERENCES {
genome_dict = ch_dict // channel: [ val(meta), path(dict) ]
sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ]
mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ]
mt_bwa_index = ch_bwa_mt // channel: [ val(meta), path(index) ]
mt_bwamem2_index = BWAMEM2_INDEX_MT.out.index.collect() // channel: [ val(meta), path(index) ]
mt_dict = GATK_SD_MT.out.dict.collect() // channel: [ val(meta), path(dict) ]
mt_fasta = ch_mt_fasta_in.collect() // channel: [ val(meta), path(fasta) ]
mt_fai = SAMTOOLS_FAIDX_MT.out.fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ]
mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict // channel: [ val(meta), path(dict) ]
mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ]
mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ]
mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]

Expand Down
Loading