Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions subworkflows/nf-core/fastq_sanitise_seqkit/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
include { SEQKIT_SANA } from '../../../modules/nf-core/seqkit/sana/main'
include { SEQKIT_PAIR } from '../../../modules/nf-core/seqkit/pair/main'

workflow FASTQ_SANITISE_SEQKIT {

take:
ch_reads // channel: [ val(meta), [ fastq ] ]

main:
ch_versions = Channel.empty()

// Add strandness information to meta
ch_reads_with_strandness = ch_reads
// seqkit/sana can only receive one file at a time
.flatMap { meta, reads ->
if (meta.single_end) {
if (reads instanceof List && reads.size() != 1) {
error("Error: Check your meta.single_end value. Single-end reads should contain one file only.")
}
return [[ meta + [strandness: 'single'], reads ]]
} else {
if (!(reads instanceof List) || reads.size() != 2) {
error("Error: Check your meta.single_end value. Paired-end reads should contain two files; a forward and a reverse.")
}
return [
[ meta + [strandness: 'R1'], reads[0] ],
[ meta + [strandness: 'R2'], reads[1] ]
]
}
}

SEQKIT_SANA( ch_reads_with_strandness )
ch_versions = ch_versions.mix(SEQKIT_SANA.out.versions.first())

ch_sanitized_reads = SEQKIT_SANA.out.reads
.map { meta, fastq ->
// Remove strandness field from meta to merge back together
def clean_meta = meta.findAll { key, value -> key != 'strandness' }
return [ clean_meta, fastq ]
}
.groupTuple(by: 0)
.branch {
meta, fastq ->
single_end: meta.single_end
return [ meta, fastq ]
paired_end: !meta.single_end
return [ meta, fastq ]
}

SEQKIT_PAIR ( ch_sanitized_reads.paired_end )
ch_versions = ch_versions.mix(SEQKIT_PAIR.out.versions.first())

ch_reads = ch_sanitized_reads.single_end.mix(SEQKIT_PAIR.out.reads, SEQKIT_PAIR.out.unpaired_reads)

emit:
reads = ch_reads // channel: [ val(meta), [ fastq ] ]
versions = ch_versions // channel: [ versions.yml ]
}
45 changes: 45 additions & 0 deletions subworkflows/nf-core/fastq_sanitise_seqkit/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "fastq_sanitise_seqkit"
description: |
Filters and reports malformed FASTQ sequences with seqkit/sana,
and then pairs any paired-end files using seqkit/pair
keywords:
- fastq
- quality control
- filtering
- malformed
- pairing
- seqkit
- preprocessing
components:
- seqkit/sana
- seqkit/pair
input:
- ch_reads:
type: channel
description: |
Channel containing sample metadata and FASTQ files.
Structure: [ val(meta), [ fastq ] ]
Where meta is a map containing at least:
- id: sample identifier
- single_end: boolean indicating if data is single-end (true) or paired-end (false)
pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
output:
- reads:
type: channel
description: |
Channel containing filtered (i.e., non-malformed) and paired FASTQ files.
For single-end data: returns filtered reads
For paired-end data: returns properly paired reads and any unpaired reads
Structure: [ val(meta), [ fastq ] ]
pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
- versions:
type: file
description: |
File containing software versions
Structure: [ path(versions.yml) ]
pattern: "versions.yml"
authors:
- "@vagkaratzas"
maintainers:
- "@vagkaratzas"
8 changes: 8 additions & 0 deletions subworkflows/nf-core/fastq_sanitise_seqkit/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// IMPORTANT: This config file should be included to ensure that the subworkflow works properly.
process {

withName: SEQKIT_SANA {
ext.prefix = { "${meta.id}_${meta.strandness}" }
}

}
123 changes: 123 additions & 0 deletions subworkflows/nf-core/fastq_sanitise_seqkit/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
nextflow_workflow {

name "Test Subworkflow FASTQ_SANITISE_SEQKIT"
script "../main.nf"
workflow "FASTQ_SANITISE_SEQKIT"
config './nextflow.config'

tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/fastq_sanitise_seqkit"
tag "seqkit"
tag "seqkit/sana"
tag "seqkit/pair"


test("sarscov2 - fastq - single_end") {

when {
workflow {
"""
input[0] = Channel.of([
[ id:'test_single', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
])
"""
}
}

then {
assertAll(
{ assert workflow.success},
{ assert snapshot(
workflow.out,
workflow.out.versions.collect{ path(it).yaml }.unique()
).match() }
)
}
}

test("sarscov2 - fastq - paired_end") {

when {
workflow {
"""
input[0] = Channel.of([
[ id:'test_paired', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
]
])
"""
}
}

then {
assertAll(
{ assert workflow.success},
{ assert snapshot(
workflow.out,
workflow.out.versions.collect{ path(it).yaml }.unique()
).match() }
)
}
}

test("sarscov2 - fastq - both with single broken") {

when {
workflow {
"""
input[0] = Channel.of([
[ id:'test_both', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1_broken.fastq.gz', checkIfExists: true)
],
[
[ id:'test_both', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
]
])
"""
}
}

then {
assertAll(
{ assert workflow.success},
{ assert snapshot(
workflow.out,
workflow.out.versions.collect{ path(it).yaml }.unique()
).match() }
)
}
}

test("sarscov2 - fastq - stub") {

options "-stub"

when {
workflow {
"""
input[0] = Channel.of([
[ id: 'test_stub', single_end:true ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1_broken.fastq.gz', checkIfExists: true)
])
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
workflow.out,
workflow.out.versions.collect{ path(it).yaml }.unique()
).match() }
)
}
}
}
Loading
Loading