Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dysgy==1.6.4 #6630

Closed
wants to merge 14 commits into from
7 changes: 7 additions & 0 deletions modules/nf-core/dysgu/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: dysgu
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- dysgu
56 changes: 56 additions & 0 deletions modules/nf-core/dysgu/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process DYSGU {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/dysgu:48830f55112c399e':
'community.wave.seqera.io/library/dysgu:faf71ac972284412' }"

input:
tuple val(meta), path(input), path(index)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)

output:
tuple val(meta), path('*.vcf.gz') , emit: vcf
tuple val(meta), path('*.vcf.gz.tbi') , emit: tbi
path 'versions.yml' , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
dysgu run \\
-p ${task.cpus} \\
-x \\
$fasta \\
. \\
$input \\
| bgzip ${args2} --threads ${task.cpus} --stdout > ${prefix}.vcf.gz
tabix ${args3} ${prefix}.vcf.gz

cat <<-END_VERSIONS > versions.yml
"${task.process}":
dysgu: \$(dysgu --version 2>&1)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo "" | gzip > ${prefix}.vcf.gz
touch ${prefix}.vcf.gz.tbi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
dysgu: \$(dysgu --version 2>&1)
END_VERSIONS
"""
}
68 changes: 68 additions & 0 deletions modules/nf-core/dysgu/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: dysgu

description: Dysgu calls structural variants (SVs) from mapped sequencing reads. It is designed for accurate and efficient detection of structural variations.
keywords:
- structural variants
- sv
- vcf
tools:
- dysgu:
description: Structural variant caller for mapped sequencing data
homepage: https://github.com/kcleal/dysgu
documentation: https://github.com/kcleal/dysgu/blob/master/README.rst
tool_dev_url: https://github.com/kcleal/dysgu
doi: "10.1093/nar/gkac039"
licence: ["GPL-3.0-or-later"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- input:
type: file
description: Input BAM file
pattern: "*.bam"
- index:
type: file
description: BAM index file
pattern: "*.bai"
- fasta:
type: file
description: Genome reference FASTA file
pattern: "*.{fa,fasta}"
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fai:
type: file
description: Genome reference FASTA index file
pattern: "*.{fa.fai,fasta.fai}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'sample1' ]
- vcf:
type: file
description: VCF file with identified structural variants
pattern: "*.{vcf.gz}"
- tbi:
type: file
description: The index of the BCF/VCF file
pattern: "*.{vcf.gz.tbi}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@famosab"
- "@poddarharsh15"
maintainers:
- "@poddarharsh15"
96 changes: 96 additions & 0 deletions modules/nf-core/dysgu/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
nextflow_process {

name "Test Process DYSGU"
script "../main.nf"
process "DYSGU"
config "./nextflow.config"
tag "modules"
tag "modules_nfcore"
tag "dysgu"


test("human - bam") {

when {
process {
"""
input[0] = [ [ id:'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [ id:'reference'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() },
{ assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the variants also unstable? You could test them using the nft-vcf plugin which is already enabled in this repository

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @nvnieuwk,
Thank you for your comments. I apologize, but I didn't fully understand what you meant by "unstable variants."

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I wasn't clear I meant the variant lines in the VCF. Are they exactly the same if you rerun the tool or do they differ too? (Unstable files are usually caused by timestamps in the header)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output generated from the nf-core/sarek pipeline is identical to the output produced by running the Dysgu pipeline independently. For reference, I have attached some example screenshots below to illustrate the similarities.
PS: I still need to do some changes in the modules of dysgu that I haven't made yet but in theory it is working fine for now.

Screenshot from 2024-09-26 10-06-30

Screenshot from 2024-09-26 10-08-20

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha that command INFO line is probably the reason for the unstable md5sum. Can you test the variants MD5 instead? See here on how to do this: https://github.com/seppinho/nft-vcf?tab=readme-ov-file#variantsmd5

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like I need to use

Aha that command INFO line is probably the reason for the unstable md5sum. Can you test the variants MD5 instead? See here on how to do this: https://github.com/seppinho/nft-vcf?tab=readme-ov-file#variantsmd5

hi @nvnieuwk sorry for silly question but by adding below command line in the nf-test.config will this work, please let me know.

config {
  plugins {
    load "[email protected]"
  }
}

something like this??

config {
    testsDir "."
    workDir ".nf-test"
    configFile "conf/test.config"
    profile "test"
    plugins {
      load "[email protected]"
  }
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nft-vcf is already enabled in this repository so you shouldn't have to do this

poddarharsh15 marked this conversation as resolved.
Show resolved Hide resolved
)
}

}


test("human - cram") {

when {
process {
"""
input[0] = [ [ id:'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)
]
input[1] = [ [ id:'reference'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() },
{ assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }
)
}

}


test("human - bam - stub") {

options "-stub"

when {
process {
"""
input[0] = [ [ id:'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [ id:'reference'], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
75 changes: 75 additions & 0 deletions modules/nf-core/dysgu/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{
"human - bam - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
[
{
"id": "test"
},
"test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
"versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
],
"tbi": [
[
{
"id": "test"
},
"test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"vcf": [
[
{
"id": "test"
},
"test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions": [
"versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-07-08T13:18:45.660262"
},
"human - bam": {
"content": [
[
"versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-07-08T13:19:56.62312"
},
"human - cram": {
"content": [
[
"versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-07-08T13:20:04.494134"
}
}
5 changes: 5 additions & 0 deletions modules/nf-core/dysgu/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: DYSGU {
ext.args = '--exome '
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/dysgu/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dysgu:
- "modules/nf-core/dysgu/**"