Skip to content

Commit 12716c7

Browse files
authored
Merge pull request #100 from nf-cmgg/dev
Preprocessing v2.0.1
2 parents 7fcaf43 + 315a52d commit 12716c7

File tree

41 files changed

+375
-500
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+375
-500
lines changed

.github/workflows/linting.yml

-68
This file was deleted.

CHANGELOG.md

+9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@
33
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
44
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6+
## v2.0.1
7+
8+
- Fix syntax according to new rules
9+
- Drop usage of `check_max` function in favour of `resourceLimits`
10+
- Bump nf-schema to v2.1.0
11+
- Add configuration profiles for several datatypes
12+
- Fix usage of `--run_coverage` parameter
13+
- Update modules
14+
615
## v2.0.0
716

817
- Move repo to nf-cmgg organisation

assets/multiqc_config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
report_comment: >
2-
This report has been generated by the <a href="https://github.com/nf-cmgg/preprocessing/releases/tag/2.0.0" target="_blank">nf-cmgg/preprocessing</a>
2+
This report has been generated by the <a href="https://github.com/nf-cmgg/preprocessing/releases/tag/2.0.1" target="_blank">nf-cmgg/preprocessing</a>
33
analysis pipeline.
44
report_section_order:
55
"nf-cmgg-preprocessing-methods-description":

assets/schema_input.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
"meta": ["genome"],
3131
"type": "string",
3232
"description": "Genome build. Currently supported genomes include GRCh38, GRCm39 and GRCz11",
33-
"pattern": "^[a-zA-Z0-9_]+$",
33+
"pattern": "^[a-zA-Z0-9_-]+$",
3434
"default": null,
35-
"enum": ["GRCh38", "GRCm39", "GRCz11"]
35+
"enum": ["GRCh38", "GRCm39", "GRCz11", "hg38", "hg38-noalt"]
3636
},
3737
"aligner": {
3838
"meta": ["aligner"],
@@ -56,7 +56,7 @@
5656
"meta": ["sample_type"],
5757
"type": "string",
5858
"description": "Data type of the sample",
59-
"enum": ["DNA", "RNA"],
59+
"enum": ["DNA", "RNA", "Tissue"],
6060
"default": "DNA"
6161
},
6262
"library": {

assets/schema_sampleinfo.json

+11-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"meta": ["sample_type"],
1818
"type": "string",
1919
"description": "Data type of the sample",
20-
"enum": ["DNA", "RNA"],
20+
"enum": ["DNA", "RNA", "Tissue"],
2121
"default": "DNA"
2222
},
2323
"library": {
@@ -30,7 +30,7 @@
3030
"meta": ["tag"],
3131
"type": "string",
3232
"description": "Sample tag",
33-
"pattern": "^[a-zA-Z0-9_]+$"
33+
"pattern": "^[a-zA-Z0-9_-]+$"
3434
},
3535
"purpose": {
3636
"meta": ["purpose"],
@@ -53,11 +53,19 @@
5353
"Danio Rerio"
5454
]
5555
},
56+
"genome": {
57+
"meta": ["genome"],
58+
"type": "string",
59+
"description": "Genome build. Currently supported genomes include GRCh38, GRCm39 and GRCz11",
60+
"pattern": "^[a-zA-Z0-9_-]+$",
61+
"default": null,
62+
"enum": ["GRCh38", "GRCm39", "GRCz11", "hg38", "hg38-noalt"]
63+
},
5664
"vivar_project": {
5765
"meta": ["vivar_project"],
5866
"type": "string",
5967
"description": "Vivar project name",
60-
"pattern": "^[a-zA-Z0-9_]+$"
68+
"pattern": "^[a-zA-Z0-9_\\s-]+$"
6169
},
6270
"binsize": {
6371
"meta": ["binsize"],

conf/base.config

+17-17
Original file line numberDiff line numberDiff line change
@@ -10,40 +10,40 @@
1010

1111
process {
1212

13-
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
14-
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
15-
time = { check_max( 4.h * task.attempt, 'time' ) }
13+
cpus = { 1 * task.attempt }
14+
memory = { 8.GB * task.attempt }
15+
time = { 4.h * task.attempt }
1616

1717
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
1818
maxRetries = 1
1919
maxErrors = '-1'
2020

2121
// Process-specific resource requirements
2222
withLabel:process_single {
23-
cpus = { check_max( 1 , 'cpus' ) }
24-
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
25-
time = { check_max( 4.h * task.attempt, 'time' ) }
23+
cpus = { 1 }
24+
memory = { 8.GB * task.attempt }
25+
time = { 4.h * task.attempt }
2626
}
2727
withLabel:process_low {
28-
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
29-
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
30-
time = { check_max( 4.h * task.attempt, 'time' ) }
28+
cpus = { 2 * task.attempt }
29+
memory = { 16.GB * task.attempt }
30+
time = { 4.h * task.attempt }
3131
}
3232
withLabel:process_medium {
33-
cpus = { check_max( 9 * task.attempt, 'cpus' ) }
34-
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
35-
time = { check_max( 8.h * task.attempt, 'time' ) }
33+
cpus = { 9 * task.attempt }
34+
memory = { 72.GB * task.attempt }
35+
time = { 8.h * task.attempt }
3636
}
3737
withLabel:process_high {
38-
cpus = { check_max( 18 * task.attempt, 'cpus' ) }
39-
memory = { check_max( 144.GB * task.attempt, 'memory' ) }
40-
time = { check_max( 16.h * task.attempt, 'time' ) }
38+
cpus = { 18 * task.attempt }
39+
memory = { 144.GB * task.attempt }
40+
time = { 16.h * task.attempt }
4141
}
4242
withLabel:process_long {
43-
time = { check_max( 20.h * task.attempt, 'time' ) }
43+
time = { 20.h * task.attempt }
4444
}
4545
withLabel:process_high_memory {
46-
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
46+
memory = { 200.GB * task.attempt }
4747
}
4848
withLabel:error_ignore {
4949
errorStrategy = 'ignore'

conf/igenomes.config

+8-8
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ params {
3030
}
3131
"GRCm39" {
3232
// Genome reference
33-
fai = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GRCm39.fa.fai"
34-
fasta = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GRCm39.fa"
35-
dict = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GRCm39.dict"
36-
gtf = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GRCm39.gtf"
33+
fai = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GCF_000001635.27_GRCm39_genomic.fna.fai"
34+
fasta = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GCF_000001635.27_GRCm39_genomic.fna"
35+
dict = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GCF_000001635.27_GRCm39_genomic.dict"
36+
gtf = "${params.igenomes_base}/Mmusculus/GRCm39/seq/GCF_000001635.27_GRCm39_genomic.gtf"
3737

3838
// Aligner reference
3939
bowtie2 = "${params.igenomes_base}/Mmusculus/GRCm39/bowtie2"
@@ -46,10 +46,10 @@ params {
4646
}
4747
"GRCz11" {
4848
// Genome reference
49-
fai = "${params.igenomes_base}/Drerio/GRCz11/seq/GRCz11.fa.fai"
50-
fasta = "${params.igenomes_base}/Drerio/GRCz11/seq/GRCz11.fa"
51-
dict = "${params.igenomes_base}/Drerio/GRCz11/seq/GRCz11.dict"
52-
gtf = "${params.igenomes_base}/Drerio/GRCz11/seq/GRCz11.gtf"
49+
fai = "${params.igenomes_base}/Drerio/GRCz11/seq/GCF_000002035.6_GRCz11_genomic.fna.fai"
50+
fasta = "${params.igenomes_base}/Drerio/GRCz11/seq/GCF_000002035.6_GRCz11_genomic.fna"
51+
dict = "${params.igenomes_base}/Drerio/GRCz11/seq/GCF_000002035.6_GRCz11_genomic.dict"
52+
gtf = "${params.igenomes_base}/Drerio/GRCz11/seq/GCF_000002035.6_GRCz11_genomic.gtf"
5353

5454
// Aligner reference
5555
bowtie2 = "${params.igenomes_base}/Drerio/GRCz11/bowtie2"

conf/modules.config

+14-11
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ process {
2020

2121
// BCL convert
2222
withName: BCLCONVERT {
23-
cpus = { check_max( 18 , "cpus" ) }
24-
memory = { check_max( 144.GB, "memory" ) }
23+
cpus = 18
24+
memory = 144.GB
2525
ext.args = {[
2626
meta.lane ? "--bcl-only-lane ${meta.lane}" : "",
2727
"--force",
@@ -79,7 +79,7 @@ process {
7979
params.trim_front > 0 ? "--trim_front1 ${params.trim_front}" : "",
8080
params.trim_tail > 0 ? "--trim_tail1 ${params.trim_tail}" : "",
8181
params.adapter_R1 ? "--adapter_sequence ${params.adapter_R1}" : "",
82-
params.adapter_R2 ? "--adapter_sequence ${params.adapter_R2}" : "",
82+
params.adapter_R2 ? "--adapter_sequence_r2 ${params.adapter_R2}" : "",
8383
].join(" ").trim()}
8484
publishDir = [
8585
[
@@ -93,8 +93,8 @@ process {
9393
// FASTQ_TO_UCRAM
9494
//// Samtools Import
9595
withName: ".*FASTQ_TO_UCRAM:SAMTOOLS_IMPORT" {
96-
cpus = { check_max( 9 , "cpus" ) }
97-
memory = { check_max( 36.GB * task.attempt, "memory" ) }
96+
cpus = 9
97+
memory = { 36.GB * task.attempt }
9898
// WARNING: Do NOT escape the RG tag tabs when adding a readgroup
9999
ext.args = {[
100100
meta.readgroup ? "--rg-line \"@RG\t" + meta.readgroup.findResults{ it.value?.trim() ? "$it.key:$it.value" : null }.join("\t") + "\"" : "",
@@ -105,8 +105,8 @@ process {
105105
}
106106

107107
withName: ".*FASTQ_TO_UCRAM:SAMTOOLS_CAT" {
108-
cpus = { check_max( 9 , "cpus" ) }
109-
memory = { check_max( 36.GB * task.attempt, "memory" ) }
108+
cpus = 9
109+
memory = { 36.GB * task.attempt }
110110
}
111111

112112
// FASTQ_TO_CRAM
@@ -135,6 +135,11 @@ process {
135135
meta.readgroup ? "--rg " + meta.readgroup.findResults{ it.value?.trim() ? "$it.key:$it.value" : null }.join(" --rg ") : ""
136136
].join(" ").trim()}
137137
ext.args2 = "--fast"
138+
publishDir = [
139+
path: { meta.samplename ? "${params.outdir}/${meta.samplename}" : "${params.outdir}"},
140+
mode: params.publish_dir_mode,
141+
pattern: "*.log"
142+
]
138143
}
139144

140145
//// BWA mem/BWA mem2
@@ -167,8 +172,6 @@ process {
167172

168173
//// SNAP
169174
withName: SNAP_ALIGN {
170-
cpus = { check_max( 18 , "cpus" ) }
171-
memory = { check_max( 72.GB , "memory" ) }
172175
ext.args = {[
173176
"-b-",
174177
"-sm 20",
@@ -288,7 +291,7 @@ process {
288291
// coverage
289292
//// Mosdepth
290293
withName: ".*COVERAGE:MOSDEPTH" {
291-
cpus = { check_max( 4 , "cpus" ) }
294+
cpus = 4
292295
// filter reads with flag 1804
293296
// read unmapped (0x4)
294297
// mate unmapped (0x8)*
@@ -334,7 +337,7 @@ process {
334337

335338
//// Picard
336339
withName= ".*PICARD.*" {
337-
memory = { check_max( 8.GB * task.attempt , "memory" ) }
340+
memory = { 8.GB * task.attempt }
338341
ext.args = "--MAX_RECORDS_IN_RAM 10000000"
339342
}
340343

conf/profiles/WES.config

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
params {
2+
genome = "GRCh38"
3+
aligner = "snap"
4+
run_coverage = true
5+
disable_picard_metrics = false
6+
}

conf/profiles/WGS.config

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
params {
2+
genome = "GRCh38"
3+
aligner = "snap"
4+
run_coverage = true
5+
disable_picard_metrics = false
6+
}

conf/profiles/cmgg.config

-21
This file was deleted.

conf/profiles/copgt.config

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ params {
33
aligner = "snap"
44
run_coverage = true
55
disable_picard_metrics = true
6-
roi = "${params.genomes_base}/Hsapiens/GRCh38.p14/regions/CMGG_coPGT-M_analyses_ROI_v1.bed"
6+
roi = "${params.igenomes_base}/Hsapiens/GRCh38/regions/CMGG_coPGT-M_analyses_ROI_v1.bed"
77

88
// trimming options
99
skip_trimming = false
1010
trim_front = 6
11-
adapter_R1 = CAGATC
11+
adapter_R1 = "CAGATC"
1212
}

conf/profiles/sWGS.config

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
params {
2+
genome = "hg38-noalt"
3+
aligner = "bowtie2"
4+
run_coverage = false
5+
disable_picard_metrics = true
6+
}
7+

0 commit comments

Comments
 (0)