From bd2765390797395642975ebee573446411739010 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sun, 22 Mar 2026 11:00:26 +0000 Subject: [PATCH 01/16] improvements to use DIANN convert --- .gitignore | 1 + conf/tests/test_dia_local.config | 15 ++++ docs/usage.md | 85 +++++++++++++++++++ .../diann/assemble_empirical_library/main.nf | 5 +- .../local/diann/final_quantification/main.nf | 7 +- .../local/diann/individual_analysis/main.nf | 18 +++- .../diann/insilico_library_generation/main.nf | 2 +- .../local/diann/preliminary_analysis/main.nf | 16 +++- modules/local/samplesheet_check/main.nf | 11 +-- modules/local/sdrf_parsing/main.nf | 27 ++---- nextflow.config | 6 ++ .../local/create_input_channel/main.nf | 42 ++++----- workflows/dia.nf | 25 +++--- workflows/quantmsdiann.nf | 1 + 14 files changed, 191 insertions(+), 70 deletions(-) create mode 100644 conf/tests/test_dia_local.config diff --git a/.gitignore b/.gitignore index 99acd2c..114452d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ null/ .cursor/rules/codacy.mdc .codacy/ .github/instructions/codacy.instructions.md +docs/superpowers/ \ No newline at end of file diff --git a/conf/tests/test_dia_local.config b/conf/tests/test_dia_local.config new file mode 100644 index 0000000..1dba6fc --- /dev/null +++ b/conf/tests/test_dia_local.config @@ -0,0 +1,15 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Local container overrides for testing with dev builds of sdrf-pipelines and quantms-utils. + Uses docker.io/ prefix to prevent quay.io registry from being prepended. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +process { + withName: 'SDRF_PARSING' { + container = 'docker.io/local/sdrf-pipelines:dev' + } + withName: 'DIANN_MSSTATS' { + container = 'docker.io/local/quantms-utils:dev' + } +} diff --git a/docs/usage.md b/docs/usage.md index 4464cac..23fcc4b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -88,6 +88,91 @@ nextflow run . -profile test_dia_dotd,docker --outdir results nextflow run . -profile test_latest_dia,docker --outdir results ``` +## DIA-NN parameters + +The pipeline passes parameters to DIA-NN at different steps. Some parameters come from the SDRF metadata (per-file), some from `nextflow.config` defaults, and some from the command line. The table below documents each parameter, its source, and which pipeline steps use it. + +### Parameter sources + +Parameters are resolved in this priority order: +1. **SDRF metadata** (per-file, from `convert-diann` design file) — highest priority +2. **Pipeline parameters** (`--param_name` on command line or params file) +3. **Nextflow defaults** (`nextflow.config`) — lowest priority + +### Pipeline steps + +| Step | Description | +|------|-------------| +| **INSILICO_LIBRARY_GENERATION** | Predicts a spectral library from FASTA using DIA-NN's deep learning | +| **PRELIMINARY_ANALYSIS** | Per-file calibration and mass accuracy estimation (first pass) | +| **ASSEMBLE_EMPIRICAL_LIBRARY** | Builds consensus empirical library from preliminary results | +| **INDIVIDUAL_ANALYSIS** | Per-file quantification with the empirical library (second pass) | +| **FINAL_QUANTIFICATION** | Aggregates all files into protein/peptide matrices | + +### Per-file parameters from SDRF + +These parameters are extracted per-file from the SDRF via `convert-diann` and stored in `diann_design.tsv`: + +| DIA-NN flag | SDRF column | Design column | Steps | Notes | +|---|---|---|---|---| +| `--mass-acc-ms1` | `comment[precursor mass tolerance]` | `PrecursorMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm | +| `--mass-acc` | `comment[fragment mass tolerance]` | `FragmentMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm | +| `--min-pr-mz` | `comment[ms1 scan range]` or `comment[ms min mz]` | `MS1MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | +| `--max-pr-mz` | `comment[ms1 scan range]` or `comment[ms max mz]` | `MS1MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | +| `--min-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 min mz]` | `MS2MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | +| `--max-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 max mz]` | `MS2MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | + +### Global parameters from config + +These parameters apply globally across all files. They are set in `diann_config.cfg` (from SDRF) or as pipeline parameters: + +| DIA-NN flag | Pipeline parameter | Default | Steps | Notes | +|---|---|---|---|---| +| `--cut` | (from SDRF enzyme) | — | ALL | Enzyme cut rule, derived from `comment[cleavage agent details]` | +| `--fixed-mod` | (from SDRF) | — | ALL | Fixed modifications from `comment[modification parameters]` | +| `--var-mod` | (from SDRF) | — | ALL | Variable modifications from `comment[modification parameters]` | +| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | INDIVIDUAL, FINAL | PTM site localization scoring | +| `--window` | `--scan_window` | `8` | PRELIMINARY, ASSEMBLE, INDIVIDUAL | Scan window; auto-detected when `--scan_window_automatic=true` | +| `--quick-mass-acc` | `--quick_mass_acc` | `true` | PRELIMINARY | Fast mass accuracy calibration | +| `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode` | `true` | PRELIMINARY | High-speed, low-RAM mode | +| `--pg-level` | `--pg_level` | `2` | INDIVIDUAL, FINAL | Protein grouping level | +| `--species-genes` | `--species_genes` | `false` | FINAL | Use species-specific gene names | +| `--no-norm` | `--diann_normalize` | `true` | FINAL | Disable normalization when `false` | + +### PTM site localization (`--monitor-mod`) + +DIA-NN supports PTM site localization scoring via `--monitor-mod`. When enabled, DIA-NN reports `PTM.Site.Confidence` and `PTM.Q.Value` columns for the specified modifications. + +**Important**: `--monitor-mod` is only applied to **INDIVIDUAL_ANALYSIS** and **FINAL_QUANTIFICATION**. It is intentionally excluded from earlier steps because: + +- **INSILICO_LIBRARY_GENERATION**: Library generation needs all peptides (modified + unmodified). `--monitor-mod` would filter to only modified peptides. +- **PRELIMINARY_ANALYSIS**: Calibration needs all peptides for robust mass accuracy estimation. +- **ASSEMBLE_EMPIRICAL_LIBRARY**: Library assembly needs broad peptide coverage. + +To enable PTM site localization: + +```bash +nextflow run bigbio/quantmsdiann \ + --enable_mod_localization \ + --mod_localization 'Phospho (S),Phospho (T),Phospho (Y)' \ + ... +``` + +The parameter accepts two formats: +- **Modification names** (quantms-compatible): `Phospho (S),Phospho (T),Phospho (Y)` — site info in parentheses is stripped, the base name is mapped to UniMod +- **UniMod accessions** (direct): `UniMod:21,UniMod:1` + +Supported modification name mappings: + +| Name | UniMod ID | Example | +|---|---|---| +| Phospho | `UniMod:21` | `Phospho (S),Phospho (T),Phospho (Y)` | +| GlyGly | `UniMod:121` | `GlyGly (K)` | +| Acetyl | `UniMod:1` | `Acetyl (Protein N-term)` | +| Oxidation | `UniMod:35` | `Oxidation (M)` | +| Deamidated | `UniMod:7` | `Deamidated (N),Deamidated (Q)` | +| Methylation | `UniMod:34` | `Methylation (K),Methylation (R)` | + ## Optional outputs By default, only final result files are published. Intermediate files can be exported using `save_*` parameters or via `ext.*` properties in a custom Nextflow config. diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 93596d2..8d266d3 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -30,7 +30,8 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', '--mass-acc', '--mass-acc-ms1', '--window', '--individual-mass-acc', '--individual-windows', - '--out-lib', '--use-quant', '--gen-spec-lib', '--rt-profiling'] + '--out-lib', '--use-quant', '--gen-spec-lib', '--rt-profiling', + '--monitor-mod', '--var-mod', '--fixed-mod'] // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc) blocked.sort { a -> -a.length() }.each { flag -> def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' @@ -58,7 +59,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { ls -lcth - # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice) + # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod: library assembly needs all peptides) mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ') diann --f ${(ms_files as List).join(' --f ')} \\ diff --git a/modules/local/diann/final_quantification/main.nf b/modules/local/diann/final_quantification/main.nf index 9a13e98..57ad3de 100644 --- a/modules/local/diann/final_quantification/main.nf +++ b/modules/local/diann/final_quantification/main.nf @@ -46,7 +46,8 @@ process FINAL_QUANTIFICATION { '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', '--use-quant', '--matrices', '--out', '--relaxed-prot-inf', '--pg-level', '--qvalue', '--window', '--individual-windows', - '--species-genes', '--report-decoys', '--xic', '--no-norm'] + '--species-genes', '--report-decoys', '--xic', '--no-norm', + '--monitor-mod', '--var-mod', '--fixed-mod'] // Sort by length descending so longer flags (e.g. --individual-windows) are matched before shorter prefixes (--window) blocked.sort { a -> -a.length() }.each { flag -> def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' @@ -71,8 +72,8 @@ process FINAL_QUANTIFICATION { # Notes: if .quant files are passed, mzml/.d files are not accessed, so the name needs to be passed but files # do not need to pe present. - # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice) - mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ') + # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg + mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ') diann --lib ${empirical_library} \\ --fasta ${fasta} \\ diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf index c4dea88..8e5c781 100644 --- a/modules/local/diann/individual_analysis/main.nf +++ b/modules/local/diann/individual_analysis/main.nf @@ -25,7 +25,9 @@ process INDIVIDUAL_ANALYSIS { def blocked = ['--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out', '--rt-profiling', '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', '--mass-acc', '--mass-acc-ms1', '--window', - '--no-ifs-removal', '--no-main-report', '--relaxed-prot-inf', '--pg-level'] + '--no-ifs-removal', '--no-main-report', '--relaxed-prot-inf', '--pg-level', + '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', + '--monitor-mod', '--var-mod', '--fixed-mod'] // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc) blocked.sort { a -> -a.length() }.each { flag -> def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' @@ -59,9 +61,15 @@ process INDIVIDUAL_ANALYSIS { diann_no_peptidoforms = params.diann_no_peptidoforms ? "--no-peptidoforms" : "" + // Per-file scan ranges from SDRF (empty = no flag, DIA-NN auto-detects) + min_pr_mz = meta['ms1minmz'] ? "--min-pr-mz ${meta['ms1minmz']}" : "" + max_pr_mz = meta['ms1maxmz'] ? "--max-pr-mz ${meta['ms1maxmz']}" : "" + min_fr_mz = meta['ms2minmz'] ? "--min-fr-mz ${meta['ms2minmz']}" : "" + max_fr_mz = meta['ms2maxmz'] ? "--max-fr-mz ${meta['ms2maxmz']}" : "" + """ - # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice) - mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ') + # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg + mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ') diann --lib ${library} \\ --f ${ms_file} \\ @@ -76,6 +84,10 @@ process INDIVIDUAL_ANALYSIS { --no-main-report \\ --relaxed-prot-inf \\ --pg-level $params.pg_level \\ + ${min_pr_mz} \\ + ${max_pr_mz} \\ + ${min_fr_mz} \\ + ${max_fr_mz} \\ ${diann_no_peptidoforms} \\ \${mod_flags} \\ $args diff --git a/modules/local/diann/insilico_library_generation/main.nf b/modules/local/diann/insilico_library_generation/main.nf index d61fc63..d76d79a 100644 --- a/modules/local/diann/insilico_library_generation/main.nf +++ b/modules/local/diann/insilico_library_generation/main.nf @@ -29,7 +29,7 @@ process INSILICO_LIBRARY_GENERATION { '--missed-cleavages', '--min-pep-len', '--max-pep-len', '--min-pr-charge', '--max-pr-charge', '--var-mods', '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', - '--met-excision'] + '--met-excision', '--monitor-mod'] // Sort by length descending so longer flags (e.g. --fasta-search) are matched before shorter prefixes (--fasta, --f) blocked.sort { a -> -a.length() }.each { flag -> def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' diff --git a/modules/local/diann/preliminary_analysis/main.nf b/modules/local/diann/preliminary_analysis/main.nf index f085343..81ea1db 100644 --- a/modules/local/diann/preliminary_analysis/main.nf +++ b/modules/local/diann/preliminary_analysis/main.nf @@ -25,7 +25,9 @@ process PRELIMINARY_ANALYSIS { def blocked = ['--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out', '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta', '--mass-acc', '--mass-acc-ms1', '--window', - '--quick-mass-acc', '--min-corr', '--corr-diff', '--time-corr-only'] + '--quick-mass-acc', '--min-corr', '--corr-diff', '--time-corr-only', + '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz', + '--monitor-mod', '--var-mod', '--fixed-mod'] // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc) blocked.sort { a -> -a.length() }.each { flag -> def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*' @@ -55,6 +57,12 @@ process PRELIMINARY_ANALYSIS { // Notes: Use double quotes for params, so that it is escaped in the shell. scan_window = params.scan_window_automatic ? '' : "--window $params.scan_window" + // Per-file scan ranges from SDRF (empty = no flag, DIA-NN auto-detects) + min_pr_mz = meta['ms1minmz'] ? "--min-pr-mz ${meta['ms1minmz']}" : "" + max_pr_mz = meta['ms1maxmz'] ? "--max-pr-mz ${meta['ms1maxmz']}" : "" + min_fr_mz = meta['ms2minmz'] ? "--min-fr-mz ${meta['ms2minmz']}" : "" + max_fr_mz = meta['ms2maxmz'] ? "--max-fr-mz ${meta['ms2maxmz']}" : "" + """ # Precursor Tolerance value was: ${meta['precursormasstolerance']} # Fragment Tolerance value was: ${meta['fragmentmasstolerance']} @@ -63,7 +71,7 @@ process PRELIMINARY_ANALYSIS { # Final mass accuracy is '${mass_acc}' - # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice) + # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod here: calibration needs all peptides) mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ') diann --lib ${predict_library} \\ @@ -75,6 +83,10 @@ process PRELIMINARY_ANALYSIS { ${mass_acc} \\ ${quick_mass_acc} \\ ${performance_flags} \\ + ${min_pr_mz} \\ + ${max_pr_mz} \\ + ${min_fr_mz} \\ + ${max_fr_mz} \\ ${diann_no_peptidoforms} \\ \${mod_flags} \\ $args diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf index ecb6e23..1c1c9d4 100644 --- a/modules/local/samplesheet_check/main.nf +++ b/modules/local/samplesheet_check/main.nf @@ -20,10 +20,6 @@ process SAMPLESHEET_CHECK { script: def args = task.ext.args ?: '' - def string_skip_sdrf_validation = params.validate_ontologies == false ? "--skip_sdrf_validation" : "" - def string_skip_ms_validation = params.skip_ms_validation == true ? "--skip_ms_validation" : "" - def string_skip_factor_validation = params.skip_factor_validation == true ? "--skip_factor_validation" : "" - def string_skip_experimental_design_validation = params.skip_experimental_design_validation == true ? "--skip_experimental_design_validation" : "" def string_use_ols_cache_only = params.use_ols_cache_only == true ? "--use_ols_cache_only" : "" """ @@ -40,11 +36,8 @@ process SAMPLESHEET_CHECK { cp "${input_file}" "\$OUTPUT_FILE" fi - quantmsutilsc checksamplesheet --exp_design "\$OUTPUT_FILE" --is_sdrf \\ - ${string_skip_sdrf_validation} \\ - ${string_skip_ms_validation} \\ - ${string_skip_factor_validation} \\ - ${string_skip_experimental_design_validation} \\ + quantmsutilsc checksamplesheet --exp_design "\$OUTPUT_FILE" \\ + --minimal \\ ${string_use_ols_cache_only} \\ $args \\ 2>&1 | tee input_check.log diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf index e379fac..d150321 100644 --- a/modules/local/sdrf_parsing/main.nf +++ b/modules/local/sdrf_parsing/main.nf @@ -10,33 +10,24 @@ process SDRF_PARSING { path sdrf output: - path "${sdrf.baseName}_openms_design.tsv", emit: ch_expdesign - path "${sdrf.baseName}_config.tsv" , emit: ch_sdrf_config_file - path "*.log" , emit: log - path "versions.yml" , emit: versions + path "diann_design.tsv" , emit: ch_sdrf_config_file + path "diann_design.tsv" , emit: ch_expdesign + path "diann_config.cfg" , emit: ch_diann_cfg + path "*.log" , emit: log + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' - if (params.convert_dotd) { - extensionconversions = ",.d.gz:.mzML,.d.tar.gz:.mzML,d.tar:.mzML,.d.zip:.mzML,.d:.mzML" - } else { - extensionconversions = ",.gz:,.tar.gz:,.tar:,.zip:" - } + def mod_loc_flag = (params.enable_mod_localization && params.mod_localization) ? + "--mod_localization '${params.mod_localization}'" : '' """ - ## -t2 since the one-table format parser is broken in OpenMS2.5 - ## -l for legacy behavior to always add sample columns - - parse_sdrf convert-openms \\ - -t2 -l \\ - --extension_convert raw:mzML$extensionconversions \\ + parse_sdrf convert-diann \\ -s ${sdrf} \\ + ${mod_loc_flag} \\ $args \\ 2>&1 | tee ${sdrf.baseName}_parsing.log - mv openms.tsv ${sdrf.baseName}_config.tsv - mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv - cat <<-END_VERSIONS > versions.yml "${task.process}": sdrf-pipelines: \$(parse_sdrf --version 2>/dev/null | awk -F ' ' '{print \$2}') diff --git a/nextflow.config b/nextflow.config index 982e37d..bebc264 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,6 +62,12 @@ params { // Optional outputs — control which intermediate files are published save_speclib_tsv = false // Save the TSV spectral library from in-silico generation + // DIA-NN: PTM site localization (--monitor-mod) + enable_mod_localization = false + // Comma-separated modification names, e.g. 'Phospho (S),Phospho (T),Phospho (Y)' + // or UniMod accessions, e.g. 'UniMod:21,UniMod:1' + mod_localization = 'Phospho (S),Phospho (T),Phospho (Y)' + // DIA-NN: PRELIMINARY_ANALYSIS — calibration & mass accuracy scan_window = 8 scan_window_automatic = true diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf index 4f5503a..d44e51c 100644 --- a/subworkflows/local/create_input_channel/main.nf +++ b/subworkflows/local/create_input_channel/main.nf @@ -12,11 +12,12 @@ workflow CREATE_INPUT_CHANNEL { main: ch_versions = channel.empty() - // Always parse as SDRF (OpenMS experimental design format deprecated) + // Always parse as SDRF using DIA-NN converter SDRF_PARSING(ch_sdrf) ch_versions = ch_versions.mix(SDRF_PARSING.out.versions) ch_config = SDRF_PARSING.out.ch_sdrf_config_file ch_expdesign = SDRF_PARSING.out.ch_expdesign + ch_diann_cfg = SDRF_PARSING.out.ch_diann_cfg def Set enzymes = [] def Set files = [] @@ -34,6 +35,7 @@ workflow CREATE_INPUT_CHANNEL { emit: ch_meta_config_dia // [meta, spectra_file] ch_expdesign + ch_diann_cfg versions = ch_versions } @@ -44,7 +46,7 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) { // Always use SDRF format if (!params.root_folder) { - filestr = row.URI.toString() + filestr = row.URI?.toString()?.trim() ? row.URI.toString() : row.Filename.toString() } else { filestr = row.Filename.toString() @@ -67,30 +69,22 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) { } // Validate acquisition method is DIA - if (row["Proteomics Data Acquisition Method"].toString().toLowerCase().contains("data-independent acquisition")) { + // AcquisitionMethod is already extracted by convert-diann (e.g. "Data-Independent Acquisition") + def acqMethod = row.AcquisitionMethod?.toString()?.trim() ?: "" + if (acqMethod.toLowerCase().contains("data-independent acquisition") || acqMethod.toLowerCase().contains("dia")) { + meta.acquisition_method = "dia" + } + else if (acqMethod.isEmpty()) { + // If no acquisition method column in SDRF, assume DIA (this is a DIA-only pipeline) meta.acquisition_method = "dia" } else { - log.error("This pipeline only supports Data-Independent Acquisition (DIA). Found: '${row["Proteomics Data Acquisition Method"]}'. Use the quantms pipeline for DDA workflows.") + log.error("This pipeline only supports Data-Independent Acquisition (DIA). Found: '${acqMethod}'. Use the quantms pipeline for DDA workflows.") exit(1) } - // dissociation method conversion - if (row.DissociationMethod == "COLLISION-INDUCED DISSOCIATION") { - meta.dissociationmethod = "CID" - } - else if (row.DissociationMethod == "HIGHER ENERGY BEAM-TYPE COLLISION-INDUCED DISSOCIATION") { - meta.dissociationmethod = "HCD" - } - else if (row.DissociationMethod == "ELECTRON TRANSFER DISSOCIATION") { - meta.dissociationmethod = "ETD" - } - else if (row.DissociationMethod == "ELECTRON CAPTURE DISSOCIATION") { - meta.dissociationmethod = "ECD" - } - else { - meta.dissociationmethod = row.DissociationMethod - } + // DissociationMethod is already normalized by convert-diann (HCD, CID, ETD, ECD) + meta.dissociationmethod = row.DissociationMethod?.toString()?.trim() ?: "" wrapper.acquisition_method = meta.acquisition_method @@ -131,6 +125,7 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) { exit(1) } } else { + log.warn("No precursor mass tolerance in SDRF for '${filestr}'. Using default: ${params.precursor_mass_tolerance} ${params.precursor_mass_tolerance_unit}") meta.precursormasstolerance = params.precursor_mass_tolerance } @@ -154,6 +149,7 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) { exit(1) } } else { + log.warn("No fragment mass tolerance in SDRF for '${filestr}'. Using default: ${params.fragment_mass_tolerance} ${params.fragment_mass_tolerance_unit}") meta.fragmentmasstolerance = params.fragment_mass_tolerance } @@ -175,6 +171,12 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) { meta.variablemodifications = params.variable_mods } + // Per-file scan ranges (empty string = no flags passed, DIA-NN auto-detects) + meta.ms1minmz = row.MS1MinMz?.toString()?.trim() ?: "" + meta.ms1maxmz = row.MS1MaxMz?.toString()?.trim() ?: "" + meta.ms2minmz = row.MS2MinMz?.toString()?.trim() ?: "" + meta.ms2maxmz = row.MS2MaxMz?.toString()?.trim() ?: "" + enzymes += row.Enzyme if (enzymes.size() > 1) { log.error("Currently only one enzyme is supported for the whole experiment. Specified was '${enzymes}'. Check or split your SDRF.") diff --git a/workflows/dia.nf b/workflows/dia.nf index 712f6db..e67cb54 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -7,7 +7,6 @@ // // MODULES: Local to the pipeline // -include { GENERATE_CFG } from '../modules/local/diann/generate_cfg/main' include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main' include { PRELIMINARY_ANALYSIS } from '../modules/local/diann/preliminary_analysis/main' include { ASSEMBLE_EMPIRICAL_LIBRARY } from '../modules/local/diann/assemble_empirical_library/main' @@ -30,6 +29,7 @@ workflow DIA { take: ch_file_preparation_results ch_expdesign + ch_diann_cfg main: @@ -44,12 +44,9 @@ workflow DIA { meta = ch_result.meta.unique { m -> m.experiment_id } - GENERATE_CFG(meta) - ch_software_versions = ch_software_versions - .mix(GENERATE_CFG.out.versions) - + // diann_config.cfg comes directly from SDRF_PARSING (convert-diann) // Convert to value channel so it can be consumed by all per-file processes - ch_diann_cfg = GENERATE_CFG.out.diann_cfg.first() + ch_diann_cfg_val = ch_diann_cfg.first() // // MODULE: SILICOLIBRARYGENERATION @@ -57,7 +54,7 @@ workflow DIA { if (params.diann_speclib != null && params.diann_speclib.toString() != "") { speclib = channel.from(file(params.diann_speclib, checkIfExists: true)) } else { - INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg) + INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val) speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib } @@ -80,12 +77,12 @@ workflow DIA { empirical_lib_files = preanalysis_subset .map { result -> result[1] } .collect( sort: { a, b -> file(a).getName() <=> file(b).getName() } ) - PRELIMINARY_ANALYSIS(preanalysis_subset.combine(speclib), ch_diann_cfg) + PRELIMINARY_ANALYSIS(preanalysis_subset.combine(speclib), ch_diann_cfg_val) } else { empirical_lib_files = ch_file_preparation_results .map { result -> result[1] } .collect( sort: { a, b -> file(a).getName() <=> file(b).getName() } ) - PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib), ch_diann_cfg) + PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib), ch_diann_cfg_val) } ch_software_versions = ch_software_versions .mix(PRELIMINARY_ANALYSIS.out.versions) @@ -99,7 +96,7 @@ workflow DIA { meta, PRELIMINARY_ANALYSIS.out.diann_quant.collect(), speclib, - ch_diann_cfg + ch_diann_cfg_val ) ch_software_versions = ch_software_versions .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.versions) @@ -114,7 +111,7 @@ workflow DIA { // // MODULE: INDIVIDUAL_ANALYSIS // - INDIVIDUAL_ANALYSIS(indiv_fin_analysis_in, ch_diann_cfg) + INDIVIDUAL_ANALYSIS(indiv_fin_analysis_in, ch_diann_cfg_val) ch_software_versions = ch_software_versions .mix(INDIVIDUAL_ANALYSIS.out.versions) @@ -137,7 +134,7 @@ workflow DIA { empirical_lib, INDIVIDUAL_ANALYSIS.out.diann_quant.collect(), ch_searchdb, - ch_diann_cfg) + ch_diann_cfg_val) ch_software_versions = ch_software_versions.mix( FINAL_QUANTIFICATION.out.versions @@ -179,6 +176,10 @@ def preprocessed_meta(LinkedHashMap meta) { parameters['fragmentmasstolerance'] = meta.fragmentmasstolerance parameters['fragmentmasstoleranceunit'] = meta.fragmentmasstoleranceunit parameters['enzyme'] = meta.enzyme + parameters['ms1minmz'] = meta.ms1minmz + parameters['ms1maxmz'] = meta.ms1maxmz + parameters['ms2minmz'] = meta.ms2minmz + parameters['ms2maxmz'] = meta.ms2maxmz return parameters } diff --git a/workflows/quantmsdiann.nf b/workflows/quantmsdiann.nf index a5e4f4c..911c39e 100644 --- a/workflows/quantmsdiann.nf +++ b/workflows/quantmsdiann.nf @@ -86,6 +86,7 @@ workflow QUANTMSDIANN { DIA( ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign, + CREATE_INPUT_CHANNEL.out.ch_diann_cfg, ) ch_pipeline_results = ch_pipeline_results.mix(DIA.out.diann_report) ch_msstats_in = ch_msstats_in.mix(DIA.out.msstats_in) From 0d3f787f90080c4e491bd055dce9ae087f25c17e Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 23 Mar 2026 15:40:40 +0100 Subject: [PATCH 02/16] minor changes var_mod vs monitor-mod --- conf/tests/test_dia_local.config | 3 +++ docs/usage.md | 12 +++++++----- .../local/diann/assemble_empirical_library/main.nf | 4 ++-- modules/local/diann/preliminary_analysis/main.nf | 4 ++-- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/conf/tests/test_dia_local.config b/conf/tests/test_dia_local.config index 1dba6fc..8d523c5 100644 --- a/conf/tests/test_dia_local.config +++ b/conf/tests/test_dia_local.config @@ -9,6 +9,9 @@ process { withName: 'SDRF_PARSING' { container = 'docker.io/local/sdrf-pipelines:dev' } + withName: 'SAMPLESHEET_CHECK' { + container = 'docker.io/local/quantms-utils:dev' + } withName: 'DIANN_MSSTATS' { container = 'docker.io/local/quantms-utils:dev' } diff --git a/docs/usage.md b/docs/usage.md index 23fcc4b..eeef769 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,7 +131,7 @@ These parameters apply globally across all files. They are set in `diann_config. | `--cut` | (from SDRF enzyme) | — | ALL | Enzyme cut rule, derived from `comment[cleavage agent details]` | | `--fixed-mod` | (from SDRF) | — | ALL | Fixed modifications from `comment[modification parameters]` | | `--var-mod` | (from SDRF) | — | ALL | Variable modifications from `comment[modification parameters]` | -| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | INDIVIDUAL, FINAL | PTM site localization scoring | +| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | PRELIMINARY, ASSEMBLE, INDIVIDUAL, FINAL | PTM site localization scoring (DIA-NN 1.8.x only) | | `--window` | `--scan_window` | `8` | PRELIMINARY, ASSEMBLE, INDIVIDUAL | Scan window; auto-detected when `--scan_window_automatic=true` | | `--quick-mass-acc` | `--quick_mass_acc` | `true` | PRELIMINARY | Fast mass accuracy calibration | | `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode` | `true` | PRELIMINARY | High-speed, low-RAM mode | @@ -143,11 +143,13 @@ These parameters apply globally across all files. They are set in `diann_config. DIA-NN supports PTM site localization scoring via `--monitor-mod`. When enabled, DIA-NN reports `PTM.Site.Confidence` and `PTM.Q.Value` columns for the specified modifications. -**Important**: `--monitor-mod` is only applied to **INDIVIDUAL_ANALYSIS** and **FINAL_QUANTIFICATION**. It is intentionally excluded from earlier steps because: +**Important**: `--monitor-mod` is applied to all DIA-NN steps **except INSILICO_LIBRARY_GENERATION** (where it has no effect). It is particularly important for: -- **INSILICO_LIBRARY_GENERATION**: Library generation needs all peptides (modified + unmodified). `--monitor-mod` would filter to only modified peptides. -- **PRELIMINARY_ANALYSIS**: Calibration needs all peptides for robust mass accuracy estimation. -- **ASSEMBLE_EMPIRICAL_LIBRARY**: Library assembly needs broad peptide coverage. +- **PRELIMINARY_ANALYSIS**: Affects PTM-aware scoring during calibration. +- **ASSEMBLE_EMPIRICAL_LIBRARY**: Strongly affects empirical library generation for PTM peptides. +- **INDIVIDUAL_ANALYSIS** and **FINAL_QUANTIFICATION**: Enables PTM site confidence scoring. + +Note: For DIA-NN 2.0+, `--monitor-mod` is no longer needed — PTM localization is handled automatically by `--var-mod`. The flag is only used for DIA-NN 1.8.x. To enable PTM site localization: diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 8d266d3..4d0c42f 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -59,8 +59,8 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { ls -lcth - # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod: library assembly needs all peptides) - mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ') + # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg + mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ') diann --f ${(ms_files as List).join(' --f ')} \\ --lib ${lib} \\ diff --git a/modules/local/diann/preliminary_analysis/main.nf b/modules/local/diann/preliminary_analysis/main.nf index 81ea1db..8a57241 100644 --- a/modules/local/diann/preliminary_analysis/main.nf +++ b/modules/local/diann/preliminary_analysis/main.nf @@ -71,8 +71,8 @@ process PRELIMINARY_ANALYSIS { # Final mass accuracy is '${mass_acc}' - # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod here: calibration needs all peptides) - mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ') + # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg + mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ') diann --lib ${predict_library} \\ --f ${ms_file} \\ From 95c7dda2024d4600f10875b1efe2a8b43165d62a Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Tue, 24 Mar 2026 07:25:36 +0100 Subject: [PATCH 03/16] Minor changes to capture DIANN version --- conf/diann_versions/v2_1_0.config | 2 ++ conf/diann_versions/v2_2_0.config | 2 ++ modules/local/sdrf_parsing/main.nf | 2 ++ nextflow.config | 1 + 4 files changed, 7 insertions(+) diff --git a/conf/diann_versions/v2_1_0.config b/conf/diann_versions/v2_1_0.config index bedfe95..9915726 100644 --- a/conf/diann_versions/v2_1_0.config +++ b/conf/diann_versions/v2_1_0.config @@ -2,6 +2,8 @@ * DIA-NN 2.1.0 container override (private ghcr.io) * Used by merge_ci.yml for version × feature matrix testing. */ +params.diann_version = '2.1.0' + process { withLabel: diann { container = 'ghcr.io/bigbio/diann:2.1.0' diff --git a/conf/diann_versions/v2_2_0.config b/conf/diann_versions/v2_2_0.config index 1e79ea3..93ea4ee 100644 --- a/conf/diann_versions/v2_2_0.config +++ b/conf/diann_versions/v2_2_0.config @@ -2,6 +2,8 @@ * DIA-NN 2.2.0 container override (private ghcr.io) * Used by merge_ci.yml for version × feature matrix testing. */ +params.diann_version = '2.2.0' + process { withLabel: diann { container = 'ghcr.io/bigbio/diann:2.2.0' diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf index d150321..56b402e 100644 --- a/modules/local/sdrf_parsing/main.nf +++ b/modules/local/sdrf_parsing/main.nf @@ -20,11 +20,13 @@ process SDRF_PARSING { def args = task.ext.args ?: '' def mod_loc_flag = (params.enable_mod_localization && params.mod_localization) ? "--mod_localization '${params.mod_localization}'" : '' + def diann_version_flag = params.diann_version ? "--diann_version '${params.diann_version}'" : '' """ parse_sdrf convert-diann \\ -s ${sdrf} \\ ${mod_loc_flag} \\ + ${diann_version_flag} \\ $args \\ 2>&1 | tee ${sdrf.baseName}_parsing.log diff --git a/nextflow.config b/nextflow.config index e710a44..79d8ec7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,6 +55,7 @@ params { convert_dotd = false // DIA-NN: General + diann_version = '1.8.1' // Used to control version-dependent flags (e.g. --monitor-mod for 1.8.x) diann_debug = 3 diann_speclib = null diann_extra_args = null From 64ccaf4199b2c7a89977627f810704d7954d972c Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Tue, 24 Mar 2026 07:25:57 +0100 Subject: [PATCH 04/16] specify version for 1.8.1 --- conf/diann_versions/v1_8_1.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/diann_versions/v1_8_1.config b/conf/diann_versions/v1_8_1.config index 2821ee2..5bfb7ef 100644 --- a/conf/diann_versions/v1_8_1.config +++ b/conf/diann_versions/v1_8_1.config @@ -2,6 +2,8 @@ * DIA-NN 1.8.1 container override (public biocontainers) * Used by merge_ci.yml for version × feature matrix testing. */ +params.diann_version = '1.8.1' + process { withLabel: diann { container = 'docker.io/biocontainers/diann:v1.8.1_cv1' From 4e72dbec7c721744756261e6d98ac5fde636afec Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Tue, 24 Mar 2026 10:19:25 +0100 Subject: [PATCH 05/16] minor changes --- docs/usage.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index eeef769..8eb8a7d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -241,6 +241,58 @@ Use `screen`, `tmux`, or the Nextflow `-bg` flag to run the pipeline in the back nextflow run bigbio/quantmsdiann -profile docker --input sdrf.tsv --database db.fasta --outdir results -bg ``` +## Developer testing with local containers + +When developing changes to `sdrf-pipelines` or `quantms-utils`, you can build local Docker containers and test them with the pipeline without publishing to a registry. + +### 1. Build local dev containers + +```bash +# From sdrf-pipelines repo +cd /path/to/sdrf-pipelines +docker build -f Dockerfile.dev -t local/sdrf-pipelines:dev . + +# From quantms-utils repo +cd /path/to/quantms-utils +docker build -f Dockerfile.dev -t local/quantms-utils:dev . +``` + +### 2. Run the pipeline with local containers + +Use the `test_dia_local.config` to override container references: + +```bash +nextflow run main.nf \ + -profile test_dia,docker \ + -c conf/tests/test_dia_local.config \ + --outdir results +``` + +This config (`conf/tests/test_dia_local.config`) overrides: +- `SDRF_PARSING` → `local/sdrf-pipelines:dev` +- `SAMPLESHEET_CHECK` → `local/quantms-utils:dev` +- `DIANN_MSSTATS` → `local/quantms-utils:dev` + +### 3. Using pre-converted mzML files + +To skip ThermoRawFileParser (useful on macOS/ARM where Mono crashes): + +```bash +# Convert raw files with ThermoRawFileParser v2.0+ +docker run --rm --platform=linux/amd64 \ + -v /path/to/raw:/data -v /path/to/mzml:/out \ + quay.io/biocontainers/thermorawfileparser:2.0.0.dev--h9ee0642_0 \ + ThermoRawFileParser -d /data -o /out -f 2 + +# Run pipeline with pre-converted files +nextflow run main.nf \ + -profile test_dia,docker \ + -c conf/tests/test_dia_local.config \ + --root_folder /path/to/mzml \ + --local_input_type mzML \ + --outdir results +``` + ## Nextflow memory requirements Add the following to your environment to limit Java memory: From 3dfa38c107ff760de8148a85b0461b0949f22347 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Sun, 29 Mar 2026 18:06:16 +0800 Subject: [PATCH 06/16] update --- modules/local/diann/diann_msstats/main.nf | 4 ++-- modules/local/diann/generate_cfg/main.nf | 4 ++-- modules/local/pmultiqc/main.nf | 12 ++++++------ modules/local/samplesheet_check/main.nf | 4 ++-- modules/local/sdrf_parsing/main.nf | 4 ++-- modules/local/utils/mzml_statistics/main.nf | 4 ++-- workflows/dia.nf | 1 + workflows/quantmsdiann.nf | 1 + 8 files changed, 18 insertions(+), 16 deletions(-) diff --git a/modules/local/diann/diann_msstats/main.nf b/modules/local/diann/diann_msstats/main.nf index a767910..b2e96ff 100644 --- a/modules/local/diann/diann_msstats/main.nf +++ b/modules/local/diann/diann_msstats/main.nf @@ -3,8 +3,8 @@ process DIANN_MSSTATS { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" input: path(report) diff --git a/modules/local/diann/generate_cfg/main.nf b/modules/local/diann/generate_cfg/main.nf index 9a4adef..7acf515 100644 --- a/modules/local/diann/generate_cfg/main.nf +++ b/modules/local/diann/generate_cfg/main.nf @@ -3,8 +3,8 @@ process GENERATE_CFG { label 'process_tiny' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" input: val(meta) diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 844fe24..af25267 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -2,8 +2,8 @@ process PMULTIQC { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.39--pyhdfd78af_0' : - 'biocontainers/pmultiqc:0.0.39--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.42--pyhdfd78af_0' : + 'biocontainers/pmultiqc:0.0.42--pyhdfd78af_0' }" input: path 'results/*' @@ -17,10 +17,10 @@ process PMULTIQC { script: def args = task.ext.args ?: '' - def disable_pmultiqc = (params.enable_pmultiqc) ? "--quantms_plugin" : "" - def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : "" - def disable_idxml_index = (params.enable_pmultiqc) && (params.pmultiqc_idxml_skip) ? "--ignored_idxml" : "" - def contaminant_affix = params.contaminant_string ? "--contaminant_affix ${params.contaminant_string}" : "" + def disable_pmultiqc = (params.enable_pmultiqc) ? "--quantms-plugin" : "" + def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable-table" : "" + def disable_idxml_index = (params.enable_pmultiqc) && (params.pmultiqc_idxml_skip) ? "--ignored-idxml" : "" + def contaminant_affix = params.contaminant_string ? "--contaminant-affix ${params.contaminant_string}" : "" """ set -x diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf index 1c1c9d4..09a1303 100644 --- a/modules/local/samplesheet_check/main.nf +++ b/modules/local/samplesheet_check/main.nf @@ -4,8 +4,8 @@ process SAMPLESHEET_CHECK { label 'process_tiny' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" input: path input_file diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf index 56b402e..ef9367b 100644 --- a/modules/local/sdrf_parsing/main.nf +++ b/modules/local/sdrf_parsing/main.nf @@ -3,8 +3,8 @@ process SDRF_PARSING { label 'process_tiny' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.33--pyhdfd78af_0' : - 'biocontainers/sdrf-pipelines:0.0.33--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.1.2--pyhdfd78af_0' : + 'biocontainers/sdrf-pipelines:0.1.2--pyhdfd78af_0' }" input: path sdrf diff --git a/modules/local/utils/mzml_statistics/main.nf b/modules/local/utils/mzml_statistics/main.nf index cfa2c2b..86bd694 100644 --- a/modules/local/utils/mzml_statistics/main.nf +++ b/modules/local/utils/mzml_statistics/main.nf @@ -4,8 +4,8 @@ process MZML_STATISTICS { label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" input: tuple val(meta), path(ms_file) diff --git a/workflows/dia.nf b/workflows/dia.nf index e67cb54..c35c7a3 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -159,6 +159,7 @@ workflow DIA { emit: versions = ch_software_versions diann_report = diann_main_report + diann_log = FINAL_QUANTIFICATION.out.log msstats_in = DIANN_MSSTATS.out.out_msstats } diff --git a/workflows/quantmsdiann.nf b/workflows/quantmsdiann.nf index 911c39e..9d869ac 100644 --- a/workflows/quantmsdiann.nf +++ b/workflows/quantmsdiann.nf @@ -116,6 +116,7 @@ workflow QUANTMSDIANN { ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) + ch_multiqc_files = ch_multiqc_files.mix(DIA.out.diann_log) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) ch_multiqc_quantms_logo = file("${projectDir}/assets/nf-core-quantmsdiann_logo_light.png") From c46b7dcbdb874e0bf7fe1b41f8f0b5516f31d249 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Tue, 31 Mar 2026 12:31:19 +0800 Subject: [PATCH 07/16] extract calibration params to meta --- .../diann/assemble_empirical_library/main.nf | 9 ++++ .../diann/assemble_empirical_library/meta.yml | 4 ++ .../local/diann/individual_analysis/main.nf | 42 ++++++++++++++----- .../local/diann/individual_analysis/meta.yml | 4 -- nextflow.config | 4 ++ nextflow_schema.json | 14 +++++++ workflows/dia.nf | 42 +++++++++++++++---- 7 files changed, 96 insertions(+), 23 deletions(-) diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 034b95e..2bb3e37 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -19,6 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { path "empirical_library.*", emit: empirical_library path "assemble_empirical_library.log", emit: log path "versions.yml", emit: versions + path "diann_calibrated_params.csv", emit: calibrated_params when: task.ext.when == null || task.ext.when @@ -83,6 +84,14 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { cp report.log.txt assemble_empirical_library.log + val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]") + val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]") + val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]") + if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi + if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi + if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi + echo "\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" > diann_calibrated_params.csv + cat <<-END_VERSIONS > versions.yml "${task.process}": DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?") diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml index 0d1f5b7..c6ad7ab 100644 --- a/modules/local/diann/assemble_empirical_library/meta.yml +++ b/modules/local/diann/assemble_empirical_library/meta.yml @@ -35,5 +35,9 @@ output: type: file description: File containing software version pattern: "versions.yml" + - calibrated_params: + type: file + description: A file containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log. + pattern: "diann_calibrated_params.csv" authors: - "@daichengxin" diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf index 0ba54e8..0df29dd 100644 --- a/modules/local/diann/individual_analysis/main.nf +++ b/modules/local/diann/individual_analysis/main.nf @@ -8,7 +8,7 @@ process INDIVIDUAL_ANALYSIS { 'docker.io/biocontainers/diann:v1.8.1_cv1' }" input: - tuple val(meta), path(ms_file), path(fasta), path(diann_log), path(library) + tuple val(meta), path(ms_file), path(fasta), path(library) path(diann_config) output: @@ -44,19 +44,39 @@ process INDIVIDUAL_ANALYSIS { } } - scan_window = params.scan_window - - if (params.mass_acc_automatic | params.scan_window_automatic) { - mass_acc_ms2 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 11 | tr -cd \"[0-9]\")" - scan_window = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 19 | tr -cd \"[0-9]\")" - mass_acc_ms1 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 15 | tr -cd \"[0-9]\")" - } else if (meta['precursormasstoleranceunit'].toLowerCase().endsWith('ppm') && meta['fragmentmasstoleranceunit'].toLowerCase().endsWith('ppm')) { + if (params.mass_acc_automatic || params.scan_window_automatic) { + if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) { + mass_acc_ms2 = meta.mass_acc_ms2 + mass_acc_ms1 = meta.mass_acc_ms1 + scan_window = meta.scan_window + } + else if (meta['fragmentmasstolerance']) { + mass_acc_ms2 = meta['fragmentmasstolerance'] + mass_acc_ms1 = meta['precursormasstolerance'] + scan_window = params.scan_window + } + else { + mass_acc_ms2 = params.mass_acc_ms2 + mass_acc_ms1 = params.mass_acc_ms1 + scan_window = params.scan_window + } + } else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) { mass_acc_ms1 = meta["precursormasstolerance"] mass_acc_ms2 = meta["fragmentmasstolerance"] } else { - mass_acc_ms2 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 11 | tr -cd \"[0-9]\")" - scan_window = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 19 | tr -cd \"[0-9]\")" - mass_acc_ms1 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 15 | tr -cd \"[0-9]\")" + if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) { + mass_acc_ms2 = meta.mass_acc_ms2 + mass_acc_ms1 = meta.mass_acc_ms1 + scan_window = meta.scan_window + } else if (meta['fragmentmasstolerance']) { + mass_acc_ms2 = meta['fragmentmasstolerance'] + mass_acc_ms1 = meta['precursormasstolerance'] + scan_window = params.scan_window + } else { + mass_acc_ms2 = params.mass_acc_ms2 + mass_acc_ms1 = params.mass_acc_ms1 + scan_window = params.scan_window + } } diann_no_peptidoforms = params.diann_no_peptidoforms ? "--no-peptidoforms" : "" diff --git a/modules/local/diann/individual_analysis/meta.yml b/modules/local/diann/individual_analysis/meta.yml index 655a16f..f7ffe35 100644 --- a/modules/local/diann/individual_analysis/meta.yml +++ b/modules/local/diann/individual_analysis/meta.yml @@ -10,10 +10,6 @@ tools: homepage: https://github.com/vdemichev/DiaNN documentation: https://github.com/vdemichev/DiaNN input: - - diann_log: - type: file - description: DIA-NN log file - pattern: "assemble_empirical_library.log" - empirical_library: type: file description: An empirical spectral library from the .quant files. diff --git a/nextflow.config b/nextflow.config index 79d8ec7..636562b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -85,6 +85,10 @@ params { random_preanalysis_seed = 42 empirical_assembly_ms_n = 200 + // DIA-NN: INDIVIDUAL_ANALYSIS + mass_acc_ms2 = 15 + mass_acc_ms1 = 15 + // DIA-NN: FINAL_QUANTIFICATION — summarization & output pg_level = 2 species_genes = false diff --git a/nextflow_schema.json b/nextflow_schema.json index d1215b4..c992d3e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -321,6 +321,20 @@ "help_text": " Ideally, should be approximately equal to the average number of data points per peak", "default": 8 }, + "mass_acc_ms2": { + "type": "number", + "description": "Set the MS2 mass accuracy (tolerance) to a specific value in ppm.", + "fa_icon": "fas fa-bullseye", + "help_text": "If specified, this overrides the automatic calibration. Corresponds to the --mass-acc parameter in DIA-NN.", + "default": 15 + }, + "mass_acc_ms1": { + "type": "number", + "description": "Set the MS1 mass accuracy (tolerance) to a specific value in ppm.", + "fa_icon": "fas fa-bullseye", + "help_text": "If specified, this overrides the automatic calibration. Corresponds to the --mass-acc-ms1 parameter in DIA-NN.", + "default": 15 + }, "performance_mode": { "type": "boolean", "description": "Set Low RAM & High Speed Mode for DIANN, including min-corr, corr-diff, and time-corr-only three parameters", diff --git a/workflows/dia.nf b/workflows/dia.nf index c35c7a3..c6c7ad6 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -59,12 +59,30 @@ workflow DIA { } if (params.skip_preliminary_analysis) { - assembly_log = channel.fromPath(params.empirical_assembly_log) - empirical_library = channel.fromPath(params.diann_speclib) - indiv_fin_analysis_in = ch_file_preparation_results.combine(ch_searchdb) - .combine(assembly_log) - .combine(empirical_library) - empirical_lib = empirical_library + def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null + def parsed_m2 = "0" + def parsed_m1 = "0" + def parsed_w = "0" + if (log_file && log_file.exists()) { + def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/ + if (matcher) { + parsed_m2 = matcher[0][1] + parsed_m1 = matcher[0][2] + parsed_w = matcher[0][3] + } + } + indiv_fin_analysis_in = ch_file_preparation_results + .combine(ch_searchdb) + .combine(speclib) + .map { meta_map, ms_file, fasta, library -> + def new_meta = meta_map + [ + mass_acc_ms2 : parsed_m2, + mass_acc_ms1 : parsed_m1, + scan_window : parsed_w + ] + return [ new_meta, ms_file, fasta, library ] + } + empirical_lib = speclib } else { // // MODULE: PRELIMINARY_ANALYSIS @@ -102,9 +120,17 @@ workflow DIA { .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.versions) indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) - .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.log) .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library) - + .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params) + .map { meta_map, ms_file, fasta, library, param_file -> + def values = param_file.text.trim().split(',') + def new_meta = meta_map + [ + mass_acc_ms2 : values[0], + mass_acc_ms1 : values[1], + scan_window : values[2] + ] + return [ new_meta, ms_file, fasta, library ] + } empirical_lib = ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library } From 5061ed18541ab712f0f60f60b429f416e3887609 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Tue, 31 Mar 2026 13:24:52 +0800 Subject: [PATCH 08/16] update --- .gitignore | 2 +- docs/usage.md | 71 ++++++++++--------- modules/local/diann/diann_msstats/main.nf | 4 +- modules/local/diann/generate_cfg/main.nf | 4 +- .../local/diann/individual_analysis/main.nf | 4 +- modules/local/pmultiqc/main.nf | 4 +- modules/local/samplesheet_check/main.nf | 4 +- modules/local/utils/mzml_statistics/main.nf | 4 +- nextflow_schema.json | 15 ++++ workflows/dia.nf | 4 +- 10 files changed, 67 insertions(+), 49 deletions(-) diff --git a/.gitignore b/.gitignore index 114452d..10bcc4e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,4 @@ null/ .cursor/rules/codacy.mdc .codacy/ .github/instructions/codacy.instructions.md -docs/superpowers/ \ No newline at end of file +docs/superpowers/ diff --git a/docs/usage.md b/docs/usage.md index 8eb8a7d..a9b652a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -95,49 +95,50 @@ The pipeline passes parameters to DIA-NN at different steps. Some parameters com ### Parameter sources Parameters are resolved in this priority order: + 1. **SDRF metadata** (per-file, from `convert-diann` design file) — highest priority 2. **Pipeline parameters** (`--param_name` on command line or params file) 3. **Nextflow defaults** (`nextflow.config`) — lowest priority ### Pipeline steps -| Step | Description | -|------|-------------| +| Step | Description | +| ------------------------------- | ------------------------------------------------------------------- | | **INSILICO_LIBRARY_GENERATION** | Predicts a spectral library from FASTA using DIA-NN's deep learning | -| **PRELIMINARY_ANALYSIS** | Per-file calibration and mass accuracy estimation (first pass) | -| **ASSEMBLE_EMPIRICAL_LIBRARY** | Builds consensus empirical library from preliminary results | -| **INDIVIDUAL_ANALYSIS** | Per-file quantification with the empirical library (second pass) | -| **FINAL_QUANTIFICATION** | Aggregates all files into protein/peptide matrices | +| **PRELIMINARY_ANALYSIS** | Per-file calibration and mass accuracy estimation (first pass) | +| **ASSEMBLE_EMPIRICAL_LIBRARY** | Builds consensus empirical library from preliminary results | +| **INDIVIDUAL_ANALYSIS** | Per-file quantification with the empirical library (second pass) | +| **FINAL_QUANTIFICATION** | Aggregates all files into protein/peptide matrices | ### Per-file parameters from SDRF These parameters are extracted per-file from the SDRF via `convert-diann` and stored in `diann_design.tsv`: -| DIA-NN flag | SDRF column | Design column | Steps | Notes | -|---|---|---|---|---| -| `--mass-acc-ms1` | `comment[precursor mass tolerance]` | `PrecursorMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm | -| `--mass-acc` | `comment[fragment mass tolerance]` | `FragmentMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm | -| `--min-pr-mz` | `comment[ms1 scan range]` or `comment[ms min mz]` | `MS1MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | -| `--max-pr-mz` | `comment[ms1 scan range]` or `comment[ms max mz]` | `MS1MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | -| `--min-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 min mz]` | `MS2MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | -| `--max-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 max mz]` | `MS2MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | +| DIA-NN flag | SDRF column | Design column | Steps | Notes | +| ---------------- | -------------------------------------------------- | ------------------------ | ----------------------- | ----------------------------------------------- | +| `--mass-acc-ms1` | `comment[precursor mass tolerance]` | `PrecursorMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm | +| `--mass-acc` | `comment[fragment mass tolerance]` | `FragmentMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm | +| `--min-pr-mz` | `comment[ms1 scan range]` or `comment[ms min mz]` | `MS1MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | +| `--max-pr-mz` | `comment[ms1 scan range]` or `comment[ms max mz]` | `MS1MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | +| `--min-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 min mz]` | `MS2MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | +| `--max-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 max mz]` | `MS2MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO | ### Global parameters from config These parameters apply globally across all files. They are set in `diann_config.cfg` (from SDRF) or as pipeline parameters: -| DIA-NN flag | Pipeline parameter | Default | Steps | Notes | -|---|---|---|---|---| -| `--cut` | (from SDRF enzyme) | — | ALL | Enzyme cut rule, derived from `comment[cleavage agent details]` | -| `--fixed-mod` | (from SDRF) | — | ALL | Fixed modifications from `comment[modification parameters]` | -| `--var-mod` | (from SDRF) | — | ALL | Variable modifications from `comment[modification parameters]` | -| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | PRELIMINARY, ASSEMBLE, INDIVIDUAL, FINAL | PTM site localization scoring (DIA-NN 1.8.x only) | -| `--window` | `--scan_window` | `8` | PRELIMINARY, ASSEMBLE, INDIVIDUAL | Scan window; auto-detected when `--scan_window_automatic=true` | -| `--quick-mass-acc` | `--quick_mass_acc` | `true` | PRELIMINARY | Fast mass accuracy calibration | -| `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode` | `true` | PRELIMINARY | High-speed, low-RAM mode | -| `--pg-level` | `--pg_level` | `2` | INDIVIDUAL, FINAL | Protein grouping level | -| `--species-genes` | `--species_genes` | `false` | FINAL | Use species-specific gene names | -| `--no-norm` | `--diann_normalize` | `true` | FINAL | Disable normalization when `false` | +| DIA-NN flag | Pipeline parameter | Default | Steps | Notes | +| --------------------------------------------- | -------------------------------------------------- | ----------------------------------------------- | ---------------------------------------- | --------------------------------------------------------------- | +| `--cut` | (from SDRF enzyme) | — | ALL | Enzyme cut rule, derived from `comment[cleavage agent details]` | +| `--fixed-mod` | (from SDRF) | — | ALL | Fixed modifications from `comment[modification parameters]` | +| `--var-mod` | (from SDRF) | — | ALL | Variable modifications from `comment[modification parameters]` | +| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | PRELIMINARY, ASSEMBLE, INDIVIDUAL, FINAL | PTM site localization scoring (DIA-NN 1.8.x only) | +| `--window` | `--scan_window` | `8` | PRELIMINARY, ASSEMBLE, INDIVIDUAL | Scan window; auto-detected when `--scan_window_automatic=true` | +| `--quick-mass-acc` | `--quick_mass_acc` | `true` | PRELIMINARY | Fast mass accuracy calibration | +| `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode` | `true` | PRELIMINARY | High-speed, low-RAM mode | +| `--pg-level` | `--pg_level` | `2` | INDIVIDUAL, FINAL | Protein grouping level | +| `--species-genes` | `--species_genes` | `false` | FINAL | Use species-specific gene names | +| `--no-norm` | `--diann_normalize` | `true` | FINAL | Disable normalization when `false` | ### PTM site localization (`--monitor-mod`) @@ -161,19 +162,20 @@ nextflow run bigbio/quantmsdiann \ ``` The parameter accepts two formats: + - **Modification names** (quantms-compatible): `Phospho (S),Phospho (T),Phospho (Y)` — site info in parentheses is stripped, the base name is mapped to UniMod - **UniMod accessions** (direct): `UniMod:21,UniMod:1` Supported modification name mappings: -| Name | UniMod ID | Example | -|---|---|---| -| Phospho | `UniMod:21` | `Phospho (S),Phospho (T),Phospho (Y)` | -| GlyGly | `UniMod:121` | `GlyGly (K)` | -| Acetyl | `UniMod:1` | `Acetyl (Protein N-term)` | -| Oxidation | `UniMod:35` | `Oxidation (M)` | -| Deamidated | `UniMod:7` | `Deamidated (N),Deamidated (Q)` | -| Methylation | `UniMod:34` | `Methylation (K),Methylation (R)` | +| Name | UniMod ID | Example | +| ----------- | ------------ | ------------------------------------- | +| Phospho | `UniMod:21` | `Phospho (S),Phospho (T),Phospho (Y)` | +| GlyGly | `UniMod:121` | `GlyGly (K)` | +| Acetyl | `UniMod:1` | `Acetyl (Protein N-term)` | +| Oxidation | `UniMod:35` | `Oxidation (M)` | +| Deamidated | `UniMod:7` | `Deamidated (N),Deamidated (Q)` | +| Methylation | `UniMod:34` | `Methylation (K),Methylation (R)` | ## Optional outputs @@ -269,6 +271,7 @@ nextflow run main.nf \ ``` This config (`conf/tests/test_dia_local.config`) overrides: + - `SDRF_PARSING` → `local/sdrf-pipelines:dev` - `SAMPLESHEET_CHECK` → `local/quantms-utils:dev` - `DIANN_MSSTATS` → `local/quantms-utils:dev` diff --git a/modules/local/diann/diann_msstats/main.nf b/modules/local/diann/diann_msstats/main.nf index b2e96ff..470309d 100644 --- a/modules/local/diann/diann_msstats/main.nf +++ b/modules/local/diann/diann_msstats/main.nf @@ -3,8 +3,8 @@ process DIANN_MSSTATS { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }" input: path(report) diff --git a/modules/local/diann/generate_cfg/main.nf b/modules/local/diann/generate_cfg/main.nf index 7acf515..8377030 100644 --- a/modules/local/diann/generate_cfg/main.nf +++ b/modules/local/diann/generate_cfg/main.nf @@ -3,8 +3,8 @@ process GENERATE_CFG { label 'process_tiny' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }" input: val(meta) diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf index 0df29dd..36502b5 100644 --- a/modules/local/diann/individual_analysis/main.nf +++ b/modules/local/diann/individual_analysis/main.nf @@ -49,12 +49,12 @@ process INDIVIDUAL_ANALYSIS { mass_acc_ms2 = meta.mass_acc_ms2 mass_acc_ms1 = meta.mass_acc_ms1 scan_window = meta.scan_window - } + } else if (meta['fragmentmasstolerance']) { mass_acc_ms2 = meta['fragmentmasstolerance'] mass_acc_ms1 = meta['precursormasstolerance'] scan_window = params.scan_window - } + } else { mass_acc_ms2 = params.mass_acc_ms2 mass_acc_ms1 = params.mass_acc_ms1 diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index af25267..f9d1964 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -2,8 +2,8 @@ process PMULTIQC { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.42--pyhdfd78af_0' : - 'biocontainers/pmultiqc:0.0.42--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.43--pyhdfd78af_0' : + 'biocontainers/pmultiqc:0.0.43--pyhdfd78af_0' }" input: path 'results/*' diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf index 09a1303..f2b7112 100644 --- a/modules/local/samplesheet_check/main.nf +++ b/modules/local/samplesheet_check/main.nf @@ -4,8 +4,8 @@ process SAMPLESHEET_CHECK { label 'process_tiny' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }" input: path input_file diff --git a/modules/local/utils/mzml_statistics/main.nf b/modules/local/utils/mzml_statistics/main.nf index 86bd694..f6a96d4 100644 --- a/modules/local/utils/mzml_statistics/main.nf +++ b/modules/local/utils/mzml_statistics/main.nf @@ -4,8 +4,8 @@ process MZML_STATISTICS { label 'process_single' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' : - 'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }" + 'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' : + 'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }" input: tuple val(meta), path(ms_file) diff --git a/nextflow_schema.json b/nextflow_schema.json index c992d3e..383909c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -302,6 +302,21 @@ "description": "Settings for DIA-NN - a universal software for data-independent acquisition (DIA) proteomics data processing.", "default": "", "properties": { + "diann_version": { + "type": "string", + "description": "Specify the DIA-NN version to be used in the workflow.", + "fa_icon": "fas fa-tag" + }, + "enable_mod_localization": { + "type": "boolean", + "description": "Enable or disable modification localization scoring in DIA-NN.", + "fa_icon": "fas fa-map-marker-alt" + }, + "mod_localization": { + "type": "string", + "description": "Specify the modification localization parameters for DIA-NN.", + "fa_icon": "fas fa-cogs" + }, "mass_acc_automatic": { "type": "boolean", "default": true, diff --git a/workflows/dia.nf b/workflows/dia.nf index c6c7ad6..5b66ee8 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -62,7 +62,7 @@ workflow DIA { def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null def parsed_m2 = "0" def parsed_m1 = "0" - def parsed_w = "0" + def parsed_w = "0" if (log_file && log_file.exists()) { def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/ if (matcher) { @@ -70,7 +70,7 @@ workflow DIA { parsed_m1 = matcher[0][2] parsed_w = matcher[0][3] } - } + } indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) .combine(speclib) From 4e4f0c9491a05b7360251596e5608c626a0e632f Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Tue, 31 Mar 2026 21:07:32 +0800 Subject: [PATCH 09/16] fix --- .../diann/assemble_empirical_library/main.nf | 4 +- .../diann/assemble_empirical_library/meta.yml | 8 ++-- .../local/diann/individual_analysis/main.nf | 19 ++++----- modules/local/parse_empirical_log/main.nf | 18 ++++++++ modules/local/parse_empirical_log/meta.yml | 21 ++++++++++ modules/local/samplesheet_check/meta.yml | 3 -- modules/local/sdrf_parsing/main.nf | 1 - modules/local/sdrf_parsing/meta.yml | 6 +-- nextflow.config | 4 -- nextflow_schema.json | 20 --------- .../local/create_input_channel/main.nf | 3 +- workflows/dia.nf | 41 +++++++++++-------- 12 files changed, 79 insertions(+), 69 deletions(-) create mode 100644 modules/local/parse_empirical_log/main.nf create mode 100644 modules/local/parse_empirical_log/meta.yml diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 2bb3e37..809c46f 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -19,7 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { path "empirical_library.*", emit: empirical_library path "assemble_empirical_library.log", emit: log path "versions.yml", emit: versions - path "diann_calibrated_params.csv", emit: calibrated_params + env CALIBRATED_PARAMS_VAL, emit: calibrated_params_val when: task.ext.when == null || task.ext.when @@ -90,7 +90,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi - echo "\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" > diann_calibrated_params.csv + export CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml index c6ad7ab..f4a22bc 100644 --- a/modules/local/diann/assemble_empirical_library/meta.yml +++ b/modules/local/diann/assemble_empirical_library/meta.yml @@ -35,9 +35,9 @@ output: type: file description: File containing software version pattern: "versions.yml" - - calibrated_params: - type: file - description: A file containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log. - pattern: "diann_calibrated_params.csv" + - calibrated_params_val: + type: string + description: A comma-separated string containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log. + pattern: "*,*,*" authors: - "@daichengxin" diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf index 36502b5..28cb3b5 100644 --- a/modules/local/diann/individual_analysis/main.nf +++ b/modules/local/diann/individual_analysis/main.nf @@ -50,7 +50,7 @@ process INDIVIDUAL_ANALYSIS { mass_acc_ms1 = meta.mass_acc_ms1 scan_window = meta.scan_window } - else if (meta['fragmentmasstolerance']) { + else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) { mass_acc_ms2 = meta['fragmentmasstolerance'] mass_acc_ms1 = meta['precursormasstolerance'] scan_window = params.scan_window @@ -60,19 +60,18 @@ process INDIVIDUAL_ANALYSIS { mass_acc_ms1 = params.mass_acc_ms1 scan_window = params.scan_window } - } else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) { - mass_acc_ms1 = meta["precursormasstolerance"] - mass_acc_ms2 = meta["fragmentmasstolerance"] } else { - if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) { + if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) { + mass_acc_ms1 = meta["precursormasstolerance"] + mass_acc_ms2 = meta["fragmentmasstolerance"] + scan_window = params.scan_window + } + else if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) { mass_acc_ms2 = meta.mass_acc_ms2 mass_acc_ms1 = meta.mass_acc_ms1 scan_window = meta.scan_window - } else if (meta['fragmentmasstolerance']) { - mass_acc_ms2 = meta['fragmentmasstolerance'] - mass_acc_ms1 = meta['precursormasstolerance'] - scan_window = params.scan_window - } else { + } + else { mass_acc_ms2 = params.mass_acc_ms2 mass_acc_ms1 = params.mass_acc_ms1 scan_window = params.scan_window diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf new file mode 100644 index 0000000..4ff2e78 --- /dev/null +++ b/modules/local/parse_empirical_log/main.nf @@ -0,0 +1,18 @@ +process PARSE_EMPIRICAL_LOG { + label 'process_single' + + input: + path log_file + + output: + env PARSED_VALS, emit: parsed_vals + + script: + """ + parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file}) + if [ -z "\$parsed" ]; then + parsed="0,0,0" + fi + export PARSED_VALS="\$parsed" + """ +} \ No newline at end of file diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml new file mode 100644 index 0000000..faf17fe --- /dev/null +++ b/modules/local/parse_empirical_log/meta.yml @@ -0,0 +1,21 @@ +name: "parse_empirical_log" +description: Parses the empirical assembly log file (e.g., from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis. +keywords: + - quantmsdiann +tools: + - "perl": + description: "Larry Wall's Practical Extraction and Report Language, used here for regex parsing." + homepage: "https://www.perl.org/" + documentation: "https://perldoc.perl.org/" +input: + - log_file: + type: file + description: The log file generated by the empirical library assembly step (e.g., DIA-NN stdout/stderr log) containing the Averaged recommended settings. + pattern: "*.log" +output: + - parsed_vals: + type: string + description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found. + pattern: "*,*,*" +authors: + - "@Qi-Xuan Yue" \ No newline at end of file diff --git a/modules/local/samplesheet_check/meta.yml b/modules/local/samplesheet_check/meta.yml index 28ed5e4..be51717 100644 --- a/modules/local/samplesheet_check/meta.yml +++ b/modules/local/samplesheet_check/meta.yml @@ -12,9 +12,6 @@ input: type: file description: Input samplesheet or experimental design file pattern: "*.{tsv,csv,sdrf}" - - meta: validate_ontologies - type: boolean - description: Whether to validate ontologies output: - meta: log type: file diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf index ef9367b..a89bc61 100644 --- a/modules/local/sdrf_parsing/main.nf +++ b/modules/local/sdrf_parsing/main.nf @@ -10,7 +10,6 @@ process SDRF_PARSING { path sdrf output: - path "diann_design.tsv" , emit: ch_sdrf_config_file path "diann_design.tsv" , emit: ch_expdesign path "diann_config.cfg" , emit: ch_diann_cfg path "*.log" , emit: log diff --git a/modules/local/sdrf_parsing/meta.yml b/modules/local/sdrf_parsing/meta.yml index 860f3f1..7c311f4 100644 --- a/modules/local/sdrf_parsing/meta.yml +++ b/modules/local/sdrf_parsing/meta.yml @@ -19,11 +19,7 @@ output: - ch_expdesign: type: file description: experimental design file in OpenMS format - pattern: "*openms_design.tsv" - - ch_sdrf_config_file: - type: file - description: config file with search engine parameters in OpenMS nomenclature - pattern: "*_config.tsv" + pattern: "*_design.tsv" - mqpar: type: file description: maxquant configuration file diff --git a/nextflow.config b/nextflow.config index 636562b..855456b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,10 +16,6 @@ params { // Input options and validation of sdrf files input = null - validate_ontologies = true // Enable to validate ontology terms in the SDRF - skip_ms_validation = false // Skip the validation of the MS metadata in the SDRF - skip_factor_validation = true // Skip factor values validation, factor values are important for downstream analysis - skip_experimental_design_validation = false // Skip the validation of the experimental design in the SDRF (replicates, etc) use_ols_cache_only = true // Use only the OLS cache for ontology validation (no network requests) // Tools flags diff --git a/nextflow_schema.json b/nextflow_schema.json index 383909c..68f262b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -62,26 +62,6 @@ "description": "Settings for validating the input SDRF file.", "default": "", "properties": { - "validate_ontologies": { - "type": "boolean", - "description": "Check that ontology terms in an input SDRF file exist.", - "fa_icon": "far fa-check-square", - "help_text": "If false, only a basic readability check is performed on an input SDRF file. This option is useful when ontology providers are inaccessible.", - "default": true - }, - "skip_ms_validation": { - "type": "boolean", - "description": "Skip validation of mass spectrometry files.", - "fa_icon": "far fa-check-square", - "help_text": "Skip validation of mass spectrometry metadata, including PTMs, tolerances or enzymes. Only useful if your metadata is correct but the terms are not in ontologies." - }, - "skip_factor_validation": { - "type": "boolean", - "description": "Skip validation of factor columns.", - "fa_icon": "far fa-check-square", - "help_text": "Skip validation of factor columns in the SDRF. Only useful if your factor values are correct but the sdrf-validation library does not recognize them.", - "default": true - }, "skip_experimental_design_validation": { "type": "boolean", "description": "Skip validation of experimental design.", diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf index d44e51c..5465021 100644 --- a/subworkflows/local/create_input_channel/main.nf +++ b/subworkflows/local/create_input_channel/main.nf @@ -15,7 +15,6 @@ workflow CREATE_INPUT_CHANNEL { // Always parse as SDRF using DIA-NN converter SDRF_PARSING(ch_sdrf) ch_versions = ch_versions.mix(SDRF_PARSING.out.versions) - ch_config = SDRF_PARSING.out.ch_sdrf_config_file ch_expdesign = SDRF_PARSING.out.ch_expdesign ch_diann_cfg = SDRF_PARSING.out.ch_diann_cfg @@ -27,7 +26,7 @@ workflow CREATE_INPUT_CHANNEL { experiment_id: file(ch_sdrf.toString()).baseName, ] - ch_config + ch_expdesign .splitCsv(header: true, sep: '\t') .map { row -> create_meta_channel(row, enzymes, files, wrapper) } .set { ch_meta_config_dia } diff --git a/workflows/dia.nf b/workflows/dia.nf index 5b66ee8..0b75b9a 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -7,8 +7,9 @@ // // MODULES: Local to the pipeline // -include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main' +include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main' include { PRELIMINARY_ANALYSIS } from '../modules/local/diann/preliminary_analysis/main' +include { PARSE_EMPIRICAL_LOG } from '../modules/local/parse_empirical_log/main' include { ASSEMBLE_EMPIRICAL_LIBRARY } from '../modules/local/diann/assemble_empirical_library/main' include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main' include { INDIVIDUAL_ANALYSIS } from '../modules/local/diann/individual_analysis/main' @@ -59,26 +60,30 @@ workflow DIA { } if (params.skip_preliminary_analysis) { - def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null - def parsed_m2 = "0" - def parsed_m1 = "0" - def parsed_w = "0" - if (log_file && log_file.exists()) { - def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/ - if (matcher) { - parsed_m2 = matcher[0][1] - parsed_m1 = matcher[0][2] - parsed_w = matcher[0][3] + if (params.empirical_assembly_log) { + ch_log_file = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true) + PARSE_EMPIRICAL_LOG(ch_log_file) + ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals.map { parsed_str -> + def clean_str = parsed_str.trim() + if (clean_str == "0,0,0") { + return "${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}" + } else { + return clean_str + } } + } else { + ch_parsed_vals = Channel.value("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}") } indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) .combine(speclib) - .map { meta_map, ms_file, fasta, library -> + .combine(ch_parsed_vals) + .map { meta_map, ms_file, fasta, library, param_string -> + def values = param_string.split(',') def new_meta = meta_map + [ - mass_acc_ms2 : parsed_m2, - mass_acc_ms1 : parsed_m1, - scan_window : parsed_w + mass_acc_ms2 : values[0], + mass_acc_ms1 : values[1], + scan_window : values[2] ] return [ new_meta, ms_file, fasta, library ] } @@ -121,9 +126,9 @@ workflow DIA { indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library) - .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params) - .map { meta_map, ms_file, fasta, library, param_file -> - def values = param_file.text.trim().split(',') + .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params_val) + .map { meta_map, ms_file, fasta, library, param_string -> + def values = param_string.trim().split(',') def new_meta = meta_map + [ mass_acc_ms2 : values[0], mass_acc_ms1 : values[1], From 79ce89fa86040fb75971c815e6e1bfb76eb5e050 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Tue, 31 Mar 2026 21:11:58 +0800 Subject: [PATCH 10/16] update --- modules/local/parse_empirical_log/main.nf | 2 +- modules/local/parse_empirical_log/meta.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf index 4ff2e78..88680ab 100644 --- a/modules/local/parse_empirical_log/main.nf +++ b/modules/local/parse_empirical_log/main.nf @@ -15,4 +15,4 @@ process PARSE_EMPIRICAL_LOG { fi export PARSED_VALS="\$parsed" """ -} \ No newline at end of file +} diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml index faf17fe..4388aa0 100644 --- a/modules/local/parse_empirical_log/meta.yml +++ b/modules/local/parse_empirical_log/meta.yml @@ -18,4 +18,4 @@ output: description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found. pattern: "*,*,*" authors: - - "@Qi-Xuan Yue" \ No newline at end of file + - "@Qi-Xuan Yue" From c7cf820bef7a1486f32bda35a5a5782e06e553b1 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Tue, 31 Mar 2026 21:15:59 +0800 Subject: [PATCH 11/16] update --- nextflow_schema.json | 6 ------ 1 file changed, 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 68f262b..c507318 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -62,12 +62,6 @@ "description": "Settings for validating the input SDRF file.", "default": "", "properties": { - "skip_experimental_design_validation": { - "type": "boolean", - "description": "Skip validation of experimental design.", - "fa_icon": "far fa-check-square", - "help_text": "Skip validation of experimental design in the SDRF. Only useful if your experimental design is correct but the sdrf-validation library does not recognize it." - }, "use_ols_cache_only": { "type": "boolean", "description": "Use cached version of the Ontology Lookup Service (OLS).", From be2d488c2b23cdb1de19b9fabe957e7045fd713b Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Tue, 31 Mar 2026 21:23:25 +0800 Subject: [PATCH 12/16] update --- modules/local/diann/assemble_empirical_library/main.nf | 1 + modules/local/parse_empirical_log/main.nf | 1 + 2 files changed, 2 insertions(+) diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 809c46f..9a6622f 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -84,6 +84,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { cp report.log.txt assemble_empirical_library.log + CALIBRATED_PARAMS_VAL="0,0,0" val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]") val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]") val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]") diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf index 88680ab..7927d46 100644 --- a/modules/local/parse_empirical_log/main.nf +++ b/modules/local/parse_empirical_log/main.nf @@ -9,6 +9,7 @@ process PARSE_EMPIRICAL_LOG { script: """ + PARSED_VALS="0,0,0" parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file}) if [ -z "\$parsed" ]; then parsed="0,0,0" From fb9eb34104982437bd25fd1194f4a31c130ca973 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Tue, 31 Mar 2026 21:48:40 +0800 Subject: [PATCH 13/16] fix --- modules/local/diann/assemble_empirical_library/main.nf | 2 +- modules/local/parse_empirical_log/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 9a6622f..0dfad72 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -19,7 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { path "empirical_library.*", emit: empirical_library path "assemble_empirical_library.log", emit: log path "versions.yml", emit: versions - env CALIBRATED_PARAMS_VAL, emit: calibrated_params_val + env "CALIBRATED_PARAMS_VAL", emit: calibrated_params_val when: task.ext.when == null || task.ext.when diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf index 7927d46..0951ae0 100644 --- a/modules/local/parse_empirical_log/main.nf +++ b/modules/local/parse_empirical_log/main.nf @@ -5,7 +5,7 @@ process PARSE_EMPIRICAL_LOG { path log_file output: - env PARSED_VALS, emit: parsed_vals + env "PARSED_VALS", emit: parsed_vals script: """ From 0110a3de5c60d8e3af35fa697cee6850c50bd6a5 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Wed, 1 Apr 2026 10:05:43 +0800 Subject: [PATCH 14/16] update --- .../diann/assemble_empirical_library/main.nf | 10 -------- .../diann/assemble_empirical_library/meta.yml | 4 --- modules/local/parse_empirical_log/main.nf | 19 -------------- modules/local/parse_empirical_log/meta.yml | 21 ---------------- .../local/parse_empirical_log_task/main.nf | 24 ++++++++++++++++++ .../local/parse_empirical_log_task/meta.yml | 25 +++++++++++++++++++ .../local/parse_empirical_log/main.nf | 16 ++++++++++++ .../local/parse_empirical_log/meta.yml | 25 +++++++++++++++++++ workflows/dia.nf | 21 ++++++---------- 9 files changed, 98 insertions(+), 67 deletions(-) delete mode 100644 modules/local/parse_empirical_log/main.nf delete mode 100644 modules/local/parse_empirical_log/meta.yml create mode 100644 modules/local/parse_empirical_log_task/main.nf create mode 100644 modules/local/parse_empirical_log_task/meta.yml create mode 100644 subworkflows/local/parse_empirical_log/main.nf create mode 100644 subworkflows/local/parse_empirical_log/meta.yml diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 0dfad72..034b95e 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -19,7 +19,6 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { path "empirical_library.*", emit: empirical_library path "assemble_empirical_library.log", emit: log path "versions.yml", emit: versions - env "CALIBRATED_PARAMS_VAL", emit: calibrated_params_val when: task.ext.when == null || task.ext.when @@ -84,15 +83,6 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { cp report.log.txt assemble_empirical_library.log - CALIBRATED_PARAMS_VAL="0,0,0" - val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]") - val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]") - val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]") - if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi - if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi - if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi - export CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" - cat <<-END_VERSIONS > versions.yml "${task.process}": DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?") diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml index f4a22bc..0d1f5b7 100644 --- a/modules/local/diann/assemble_empirical_library/meta.yml +++ b/modules/local/diann/assemble_empirical_library/meta.yml @@ -35,9 +35,5 @@ output: type: file description: File containing software version pattern: "versions.yml" - - calibrated_params_val: - type: string - description: A comma-separated string containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log. - pattern: "*,*,*" authors: - "@daichengxin" diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf deleted file mode 100644 index 0951ae0..0000000 --- a/modules/local/parse_empirical_log/main.nf +++ /dev/null @@ -1,19 +0,0 @@ -process PARSE_EMPIRICAL_LOG { - label 'process_single' - - input: - path log_file - - output: - env "PARSED_VALS", emit: parsed_vals - - script: - """ - PARSED_VALS="0,0,0" - parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file}) - if [ -z "\$parsed" ]; then - parsed="0,0,0" - fi - export PARSED_VALS="\$parsed" - """ -} diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml deleted file mode 100644 index 4388aa0..0000000 --- a/modules/local/parse_empirical_log/meta.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: "parse_empirical_log" -description: Parses the empirical assembly log file (e.g., from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis. -keywords: - - quantmsdiann -tools: - - "perl": - description: "Larry Wall's Practical Extraction and Report Language, used here for regex parsing." - homepage: "https://www.perl.org/" - documentation: "https://perldoc.perl.org/" -input: - - log_file: - type: file - description: The log file generated by the empirical library assembly step (e.g., DIA-NN stdout/stderr log) containing the Averaged recommended settings. - pattern: "*.log" -output: - - parsed_vals: - type: string - description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found. - pattern: "*,*,*" -authors: - - "@Qi-Xuan Yue" diff --git a/modules/local/parse_empirical_log_task/main.nf b/modules/local/parse_empirical_log_task/main.nf new file mode 100644 index 0000000..8a698de --- /dev/null +++ b/modules/local/parse_empirical_log_task/main.nf @@ -0,0 +1,24 @@ +process PARSE_EMPIRICAL_LOG_TASK { + label 'process_single' + + input: + path log_file + + output: + stdout emit: parsed_vals + + script: + """ + val_mass_acc_ms2=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 11 | tr -cd "[0-9.]") + val_mass_acc_ms1=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 15 | tr -cd "[0-9.]") + val_scan_window=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 19 | tr -cd "[0-9.]") + + if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2=${params.mass_acc_ms2}; fi + if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1=${params.mass_acc_ms1}; fi + if [ -z "\$val_scan_window" ]; then val_scan_window=${params.scan_window}; fi + + CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" + + echo -n "\$CALIBRATED_PARAMS_VAL" + """ +} diff --git a/modules/local/parse_empirical_log_task/meta.yml b/modules/local/parse_empirical_log_task/meta.yml new file mode 100644 index 0000000..8bdc6b7 --- /dev/null +++ b/modules/local/parse_empirical_log_task/meta.yml @@ -0,0 +1,25 @@ +name: "parse_empirical_log_task" +description: "Parses the empirical assembly log file (from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis." +keywords: + - quantmsdiann + - diann + - log + - parse + - proteomics + - mass_accuracy +tools: + - "coreutils": + description: "Standard GNU core utilities (grep, cut, tr, echo) used for text processing and log parsing." + homepage: "https://www.gnu.org/software/coreutils/" + documentation: "https://www.gnu.org/software/coreutils/manual/" +input: + - log_file: + type: file + description: "The log file generated by the empirical library assembly step (DIA-NN stdout/stderr log) containing the Averaged recommended settings." + pattern: "*.log" +output: + - parsed_vals: + type: string + description: "A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., '15,20,8'). +authors: + - "@bigbio" diff --git a/subworkflows/local/parse_empirical_log/main.nf b/subworkflows/local/parse_empirical_log/main.nf new file mode 100644 index 0000000..be86eef --- /dev/null +++ b/subworkflows/local/parse_empirical_log/main.nf @@ -0,0 +1,16 @@ + +include { PARSE_EMPIRICAL_LOG_TASK } from '../../../modules/local/parse_empirical_log_task' + +workflow PARSE_EMPIRICAL_LOG { + take: + ch_log_file + + main: + PARSE_EMPIRICAL_LOG_TASK(ch_log_file) + + ch_parsed_vals = PARSE_EMPIRICAL_LOG_TASK.out.parsed_vals + .ifEmpty("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}") + + emit: + parsed_vals = ch_parsed_vals +} \ No newline at end of file diff --git a/subworkflows/local/parse_empirical_log/meta.yml b/subworkflows/local/parse_empirical_log/meta.yml new file mode 100644 index 0000000..f6baabb --- /dev/null +++ b/subworkflows/local/parse_empirical_log/meta.yml @@ -0,0 +1,25 @@ +name: "parse_empirical_log" +description: "Subworkflow for parsing the empirical assembly log file (from DIA-NN) to extract calibrated parameters." +keywords: + - parse + - log + - diann + - proteomics + - parameters + - mass_accuracy +components: + - parse_empirical_log_task +input: + - ch_log_file: + type: file + description: | + The log file generated by the empirical library assembly step. Can be an empty channel if the user did not provide a log file. +output: + - parsed_vals: + type: string + description: | + A value channel containing a comma-separated string of the extracted parameters (mass_acc_ms2, mass_acc_ms1, scan_window). Falls back to default pipeline parameters if the log is empty or invalid. +authors: + - "@bigbio" +maintainers: + - "@bigbio" diff --git a/workflows/dia.nf b/workflows/dia.nf index 0b75b9a..67e6e1e 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -9,7 +9,7 @@ // include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main' include { PRELIMINARY_ANALYSIS } from '../modules/local/diann/preliminary_analysis/main' -include { PARSE_EMPIRICAL_LOG } from '../modules/local/parse_empirical_log/main' +include { PARSE_EMPIRICAL_LOG } from '../subworkflows/local/parse_empirical_log/main' include { ASSEMBLE_EMPIRICAL_LIBRARY } from '../modules/local/diann/assemble_empirical_library/main' include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main' include { INDIVIDUAL_ANALYSIS } from '../modules/local/diann/individual_analysis/main' @@ -61,19 +61,12 @@ workflow DIA { if (params.skip_preliminary_analysis) { if (params.empirical_assembly_log) { - ch_log_file = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true) - PARSE_EMPIRICAL_LOG(ch_log_file) - ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals.map { parsed_str -> - def clean_str = parsed_str.trim() - if (clean_str == "0,0,0") { - return "${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}" - } else { - return clean_str - } - } + ch_empirical_log = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true) } else { - ch_parsed_vals = Channel.value("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}") + ch_empirical_log = Channel.empty() } + PARSE_EMPIRICAL_LOG(ch_empirical_log) + ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) .combine(speclib) @@ -123,10 +116,12 @@ workflow DIA { ) ch_software_versions = ch_software_versions .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.versions) + PARSE_EMPIRICAL_LOG(ASSEMBLE_EMPIRICAL_LIBRARY.out.log) + ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library) - .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params_val) + .combine(ch_parsed_vals) .map { meta_map, ms_file, fasta, library, param_string -> def values = param_string.trim().split(',') def new_meta = meta_map + [ From a7db13bc206108abac3a1b0cacfdd3b77185de5e Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Wed, 1 Apr 2026 10:07:24 +0800 Subject: [PATCH 15/16] update --- modules/local/parse_empirical_log_task/main.nf | 4 ++-- subworkflows/local/parse_empirical_log/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/parse_empirical_log_task/main.nf b/modules/local/parse_empirical_log_task/main.nf index 8a698de..dcee2c8 100644 --- a/modules/local/parse_empirical_log_task/main.nf +++ b/modules/local/parse_empirical_log_task/main.nf @@ -12,11 +12,11 @@ process PARSE_EMPIRICAL_LOG_TASK { val_mass_acc_ms2=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 11 | tr -cd "[0-9.]") val_mass_acc_ms1=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 15 | tr -cd "[0-9.]") val_scan_window=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 19 | tr -cd "[0-9.]") - + if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2=${params.mass_acc_ms2}; fi if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1=${params.mass_acc_ms1}; fi if [ -z "\$val_scan_window" ]; then val_scan_window=${params.scan_window}; fi - + CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" echo -n "\$CALIBRATED_PARAMS_VAL" diff --git a/subworkflows/local/parse_empirical_log/main.nf b/subworkflows/local/parse_empirical_log/main.nf index be86eef..0d1322d 100644 --- a/subworkflows/local/parse_empirical_log/main.nf +++ b/subworkflows/local/parse_empirical_log/main.nf @@ -13,4 +13,4 @@ workflow PARSE_EMPIRICAL_LOG { emit: parsed_vals = ch_parsed_vals -} \ No newline at end of file +} From bfde7a99359ae5123730432a57ab9999dc47dd84 Mon Sep 17 00:00:00 2001 From: yueqixuan Date: Wed, 1 Apr 2026 10:10:47 +0800 Subject: [PATCH 16/16] fix --- modules/local/parse_empirical_log_task/meta.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/parse_empirical_log_task/meta.yml b/modules/local/parse_empirical_log_task/meta.yml index 8bdc6b7..df72533 100644 --- a/modules/local/parse_empirical_log_task/meta.yml +++ b/modules/local/parse_empirical_log_task/meta.yml @@ -20,6 +20,7 @@ input: output: - parsed_vals: type: string - description: "A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., '15,20,8'). + description: "A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., '15,20,8')." + pattern: "*,*,*" authors: - "@bigbio"