Skip to content

Commit bb08c3c

Browse files
vagkaratzasjfy133
andauthored
Add fastq seqkit sana pair (#9185)
* FASTQ_SEQKIT_SANA_PAIR subworkflow init * more tests and meta added * unnecessary operations removed * in one line * Update subworkflows/nf-core/fastq_seqkit_sana_pair/main.nf Co-authored-by: James A. Fellows Yates <[email protected]> * meta descr and keywords updated * descr updated * renamed and removed the copying mechanism, replaced with adding/removing strandness on the meta * mandatory config * meta single_end checks --------- Co-authored-by: James A. Fellows Yates <[email protected]>
1 parent fb9e715 commit bb08c3c

File tree

6 files changed

+467
-0
lines changed

6 files changed

+467
-0
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
include { SEQKIT_SANA } from '../../../modules/nf-core/seqkit/sana/main'
2+
include { SEQKIT_PAIR } from '../../../modules/nf-core/seqkit/pair/main'
3+
4+
workflow FASTQ_SANITISE_SEQKIT {
5+
6+
take:
7+
ch_reads // channel: [ val(meta), [ fastq ] ]
8+
9+
main:
10+
ch_versions = Channel.empty()
11+
12+
// Add strandness information to meta
13+
ch_reads_with_strandness = ch_reads
14+
// seqkit/sana can only receive one file at a time
15+
.flatMap { meta, reads ->
16+
if (meta.single_end) {
17+
if (reads instanceof List && reads.size() != 1) {
18+
error("Error: Check your meta.single_end value. Single-end reads should contain one file only.")
19+
}
20+
return [[ meta + [strandness: 'single'], reads ]]
21+
} else {
22+
if (!(reads instanceof List) || reads.size() != 2) {
23+
error("Error: Check your meta.single_end value. Paired-end reads should contain two files; a forward and a reverse.")
24+
}
25+
return [
26+
[ meta + [strandness: 'R1'], reads[0] ],
27+
[ meta + [strandness: 'R2'], reads[1] ]
28+
]
29+
}
30+
}
31+
32+
SEQKIT_SANA( ch_reads_with_strandness )
33+
ch_versions = ch_versions.mix(SEQKIT_SANA.out.versions.first())
34+
35+
ch_sanitized_reads = SEQKIT_SANA.out.reads
36+
.map { meta, fastq ->
37+
// Remove strandness field from meta to merge back together
38+
def clean_meta = meta.findAll { key, value -> key != 'strandness' }
39+
return [ clean_meta, fastq ]
40+
}
41+
.groupTuple(by: 0)
42+
.branch {
43+
meta, fastq ->
44+
single_end: meta.single_end
45+
return [ meta, fastq ]
46+
paired_end: !meta.single_end
47+
return [ meta, fastq ]
48+
}
49+
50+
SEQKIT_PAIR ( ch_sanitized_reads.paired_end )
51+
ch_versions = ch_versions.mix(SEQKIT_PAIR.out.versions.first())
52+
53+
ch_reads = ch_sanitized_reads.single_end.mix(SEQKIT_PAIR.out.reads, SEQKIT_PAIR.out.unpaired_reads)
54+
55+
emit:
56+
reads = ch_reads // channel: [ val(meta), [ fastq ] ]
57+
versions = ch_versions // channel: [ versions.yml ]
58+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
2+
name: "fastq_sanitise_seqkit"
3+
description: |
4+
Filters and reports malformed FASTQ sequences with seqkit/sana,
5+
and then pairs any paired-end files using seqkit/pair
6+
keywords:
7+
- fastq
8+
- quality control
9+
- filtering
10+
- malformed
11+
- pairing
12+
- seqkit
13+
- preprocessing
14+
components:
15+
- seqkit/sana
16+
- seqkit/pair
17+
input:
18+
- ch_reads:
19+
type: channel
20+
description: |
21+
Channel containing sample metadata and FASTQ files.
22+
Structure: [ val(meta), [ fastq ] ]
23+
Where meta is a map containing at least:
24+
- id: sample identifier
25+
- single_end: boolean indicating if data is single-end (true) or paired-end (false)
26+
pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
27+
output:
28+
- reads:
29+
type: channel
30+
description: |
31+
Channel containing filtered (i.e., non-malformed) and paired FASTQ files.
32+
For single-end data: returns filtered reads
33+
For paired-end data: returns properly paired reads and any unpaired reads
34+
Structure: [ val(meta), [ fastq ] ]
35+
pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
36+
- versions:
37+
type: file
38+
description: |
39+
File containing software versions
40+
Structure: [ path(versions.yml) ]
41+
pattern: "versions.yml"
42+
authors:
43+
- "@vagkaratzas"
44+
maintainers:
45+
- "@vagkaratzas"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// IMPORTANT: This config file should be included to ensure that the subworkflow works properly.
2+
process {
3+
4+
withName: SEQKIT_SANA {
5+
ext.prefix = { "${meta.id}_${meta.strandness}" }
6+
}
7+
8+
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
nextflow_workflow {
2+
3+
name "Test Subworkflow FASTQ_SANITISE_SEQKIT"
4+
script "../main.nf"
5+
workflow "FASTQ_SANITISE_SEQKIT"
6+
config './nextflow.config'
7+
8+
tag "subworkflows"
9+
tag "subworkflows_nfcore"
10+
tag "subworkflows/fastq_sanitise_seqkit"
11+
tag "seqkit"
12+
tag "seqkit/sana"
13+
tag "seqkit/pair"
14+
15+
16+
test("sarscov2 - fastq - single_end") {
17+
18+
when {
19+
workflow {
20+
"""
21+
input[0] = Channel.of([
22+
[ id:'test_single', single_end:true ], // meta map
23+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
24+
])
25+
"""
26+
}
27+
}
28+
29+
then {
30+
assertAll(
31+
{ assert workflow.success},
32+
{ assert snapshot(
33+
workflow.out,
34+
workflow.out.versions.collect{ path(it).yaml }.unique()
35+
).match() }
36+
)
37+
}
38+
}
39+
40+
test("sarscov2 - fastq - paired_end") {
41+
42+
when {
43+
workflow {
44+
"""
45+
input[0] = Channel.of([
46+
[ id:'test_paired', single_end:false ], // meta map
47+
[
48+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
49+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
50+
]
51+
])
52+
"""
53+
}
54+
}
55+
56+
then {
57+
assertAll(
58+
{ assert workflow.success},
59+
{ assert snapshot(
60+
workflow.out,
61+
workflow.out.versions.collect{ path(it).yaml }.unique()
62+
).match() }
63+
)
64+
}
65+
}
66+
67+
test("sarscov2 - fastq - both with single broken") {
68+
69+
when {
70+
workflow {
71+
"""
72+
input[0] = Channel.of([
73+
[ id:'test_both', single_end:true ], // meta map
74+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1_broken.fastq.gz', checkIfExists: true)
75+
],
76+
[
77+
[ id:'test_both', single_end:false ], // meta map
78+
[
79+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
80+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
81+
]
82+
])
83+
"""
84+
}
85+
}
86+
87+
then {
88+
assertAll(
89+
{ assert workflow.success},
90+
{ assert snapshot(
91+
workflow.out,
92+
workflow.out.versions.collect{ path(it).yaml }.unique()
93+
).match() }
94+
)
95+
}
96+
}
97+
98+
test("sarscov2 - fastq - stub") {
99+
100+
options "-stub"
101+
102+
when {
103+
workflow {
104+
"""
105+
input[0] = Channel.of([
106+
[ id: 'test_stub', single_end:true ],
107+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1_broken.fastq.gz', checkIfExists: true)
108+
])
109+
"""
110+
}
111+
}
112+
113+
then {
114+
assertAll(
115+
{ assert workflow.success },
116+
{ assert snapshot(
117+
workflow.out,
118+
workflow.out.versions.collect{ path(it).yaml }.unique()
119+
).match() }
120+
)
121+
}
122+
}
123+
}

0 commit comments

Comments
 (0)