From bd2765390797395642975ebee573446411739010 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sun, 22 Mar 2026 11:00:26 +0000
Subject: [PATCH 01/16] improvements to use DIANN convert

---
 .gitignore                                    |  1 +
 conf/tests/test_dia_local.config              | 15 ++++
 docs/usage.md                                 | 85 +++++++++++++++++++
 .../diann/assemble_empirical_library/main.nf  |  5 +-
 .../local/diann/final_quantification/main.nf  |  7 +-
 .../local/diann/individual_analysis/main.nf   | 18 +++-
 .../diann/insilico_library_generation/main.nf |  2 +-
 .../local/diann/preliminary_analysis/main.nf  | 16 +++-
 modules/local/samplesheet_check/main.nf       | 11 +--
 modules/local/sdrf_parsing/main.nf            | 27 ++----
 nextflow.config                               |  6 ++
 .../local/create_input_channel/main.nf        | 42 ++++-----
 workflows/dia.nf                              | 25 +++---
 workflows/quantmsdiann.nf                     |  1 +
 14 files changed, 191 insertions(+), 70 deletions(-)
 create mode 100644 conf/tests/test_dia_local.config

diff --git a/.gitignore b/.gitignore
index 99acd2c..114452d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,4 @@ null/
 .cursor/rules/codacy.mdc
 .codacy/
 .github/instructions/codacy.instructions.md
+docs/superpowers/
\ No newline at end of file
diff --git a/conf/tests/test_dia_local.config b/conf/tests/test_dia_local.config
new file mode 100644
index 0000000..1dba6fc
--- /dev/null
+++ b/conf/tests/test_dia_local.config
@@ -0,0 +1,15 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Local container overrides for testing with dev builds of sdrf-pipelines and quantms-utils.
+    Uses docker.io/ prefix to prevent quay.io registry from being prepended.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process {
+    withName: 'SDRF_PARSING' {
+        container = 'docker.io/local/sdrf-pipelines:dev'
+    }
+    withName: 'DIANN_MSSTATS' {
+        container = 'docker.io/local/quantms-utils:dev'
+    }
+}
diff --git a/docs/usage.md b/docs/usage.md
index 4464cac..23fcc4b 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -88,6 +88,91 @@ nextflow run . -profile test_dia_dotd,docker --outdir results
 nextflow run . -profile test_latest_dia,docker --outdir results
 ```
 
+## DIA-NN parameters
+
+The pipeline passes parameters to DIA-NN at different steps. Some parameters come from the SDRF metadata (per-file), some from `nextflow.config` defaults, and some from the command line. The table below documents each parameter, its source, and which pipeline steps use it.
+
+### Parameter sources
+
+Parameters are resolved in this priority order:
+1. **SDRF metadata** (per-file, from `convert-diann` design file) — highest priority
+2. **Pipeline parameters** (`--param_name` on command line or params file)
+3. **Nextflow defaults** (`nextflow.config`) — lowest priority
+
+### Pipeline steps
+
+| Step | Description |
+|------|-------------|
+| **INSILICO_LIBRARY_GENERATION** | Predicts a spectral library from FASTA using DIA-NN's deep learning |
+| **PRELIMINARY_ANALYSIS** | Per-file calibration and mass accuracy estimation (first pass) |
+| **ASSEMBLE_EMPIRICAL_LIBRARY** | Builds consensus empirical library from preliminary results |
+| **INDIVIDUAL_ANALYSIS** | Per-file quantification with the empirical library (second pass) |
+| **FINAL_QUANTIFICATION** | Aggregates all files into protein/peptide matrices |
+
+### Per-file parameters from SDRF
+
+These parameters are extracted per-file from the SDRF via `convert-diann` and stored in `diann_design.tsv`:
+
+| DIA-NN flag | SDRF column | Design column | Steps | Notes |
+|---|---|---|---|---|
+| `--mass-acc-ms1` | `comment[precursor mass tolerance]` | `PrecursorMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm |
+| `--mass-acc` | `comment[fragment mass tolerance]` | `FragmentMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm |
+| `--min-pr-mz` | `comment[ms1 scan range]` or `comment[ms min mz]` | `MS1MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
+| `--max-pr-mz` | `comment[ms1 scan range]` or `comment[ms max mz]` | `MS1MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
+| `--min-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 min mz]` | `MS2MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
+| `--max-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 max mz]` | `MS2MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
+
+### Global parameters from config
+
+These parameters apply globally across all files. They are set in `diann_config.cfg` (from SDRF) or as pipeline parameters:
+
+| DIA-NN flag | Pipeline parameter | Default | Steps | Notes |
+|---|---|---|---|---|
+| `--cut` | (from SDRF enzyme) | — | ALL | Enzyme cut rule, derived from `comment[cleavage agent details]` |
+| `--fixed-mod` | (from SDRF) | — | ALL | Fixed modifications from `comment[modification parameters]` |
+| `--var-mod` | (from SDRF) | — | ALL | Variable modifications from `comment[modification parameters]` |
+| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | INDIVIDUAL, FINAL | PTM site localization scoring |
+| `--window` | `--scan_window` | `8` | PRELIMINARY, ASSEMBLE, INDIVIDUAL | Scan window; auto-detected when `--scan_window_automatic=true` |
+| `--quick-mass-acc` | `--quick_mass_acc` | `true` | PRELIMINARY | Fast mass accuracy calibration |
+| `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode` | `true` | PRELIMINARY | High-speed, low-RAM mode |
+| `--pg-level` | `--pg_level` | `2` | INDIVIDUAL, FINAL | Protein grouping level |
+| `--species-genes` | `--species_genes` | `false` | FINAL | Use species-specific gene names |
+| `--no-norm` | `--diann_normalize` | `true` | FINAL | Disable normalization when `false` |
+
+### PTM site localization (`--monitor-mod`)
+
+DIA-NN supports PTM site localization scoring via `--monitor-mod`. When enabled, DIA-NN reports `PTM.Site.Confidence` and `PTM.Q.Value` columns for the specified modifications.
+
+**Important**: `--monitor-mod` is only applied to **INDIVIDUAL_ANALYSIS** and **FINAL_QUANTIFICATION**. It is intentionally excluded from earlier steps because:
+
+- **INSILICO_LIBRARY_GENERATION**: Library generation needs all peptides (modified + unmodified). `--monitor-mod` would filter to only modified peptides.
+- **PRELIMINARY_ANALYSIS**: Calibration needs all peptides for robust mass accuracy estimation.
+- **ASSEMBLE_EMPIRICAL_LIBRARY**: Library assembly needs broad peptide coverage.
+
+To enable PTM site localization:
+
+```bash
+nextflow run bigbio/quantmsdiann \
+    --enable_mod_localization \
+    --mod_localization 'Phospho (S),Phospho (T),Phospho (Y)' \
+    ...
+```
+
+The parameter accepts two formats:
+- **Modification names** (quantms-compatible): `Phospho (S),Phospho (T),Phospho (Y)` — site info in parentheses is stripped, the base name is mapped to UniMod
+- **UniMod accessions** (direct): `UniMod:21,UniMod:1`
+
+Supported modification name mappings:
+
+| Name | UniMod ID | Example |
+|---|---|---|
+| Phospho | `UniMod:21` | `Phospho (S),Phospho (T),Phospho (Y)` |
+| GlyGly | `UniMod:121` | `GlyGly (K)` |
+| Acetyl | `UniMod:1` | `Acetyl (Protein N-term)` |
+| Oxidation | `UniMod:35` | `Oxidation (M)` |
+| Deamidated | `UniMod:7` | `Deamidated (N),Deamidated (Q)` |
+| Methylation | `UniMod:34` | `Methylation (K),Methylation (R)` |
+
 ## Optional outputs
 
 By default, only final result files are published. Intermediate files can be exported using `save_*` parameters or via `ext.*` properties in a custom Nextflow config.
diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
index 93596d2..8d266d3 100644
--- a/modules/local/diann/assemble_empirical_library/main.nf
+++ b/modules/local/diann/assemble_empirical_library/main.nf
@@ -30,7 +30,8 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
          '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta',
          '--mass-acc', '--mass-acc-ms1', '--window',
          '--individual-mass-acc', '--individual-windows',
-         '--out-lib', '--use-quant', '--gen-spec-lib', '--rt-profiling']
+         '--out-lib', '--use-quant', '--gen-spec-lib', '--rt-profiling',
+         '--monitor-mod', '--var-mod', '--fixed-mod']
     // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc)
     blocked.sort { a -> -a.length() }.each { flag ->
         def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*'
@@ -58,7 +59,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
 
     ls -lcth
 
-    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice)
+    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod: library assembly needs all peptides)
     mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ')
 
     diann   --f ${(ms_files as List).join(' --f ')} \\
diff --git a/modules/local/diann/final_quantification/main.nf b/modules/local/diann/final_quantification/main.nf
index 9a13e98..57ad3de 100644
--- a/modules/local/diann/final_quantification/main.nf
+++ b/modules/local/diann/final_quantification/main.nf
@@ -46,7 +46,8 @@ process FINAL_QUANTIFICATION {
          '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta',
          '--use-quant', '--matrices', '--out', '--relaxed-prot-inf', '--pg-level',
          '--qvalue', '--window', '--individual-windows',
-         '--species-genes', '--report-decoys', '--xic', '--no-norm']
+         '--species-genes', '--report-decoys', '--xic', '--no-norm',
+         '--monitor-mod', '--var-mod', '--fixed-mod']
     // Sort by length descending so longer flags (e.g. --individual-windows) are matched before shorter prefixes (--window)
     blocked.sort { a -> -a.length() }.each { flag ->
         def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*'
@@ -71,8 +72,8 @@ process FINAL_QUANTIFICATION {
     # Notes: if .quant files are passed, mzml/.d files are not accessed, so the name needs to be passed but files
     # do not need to pe present.
 
-    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice)
-    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ')
+    # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg
+    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ')
 
     diann   --lib ${empirical_library} \\
             --fasta ${fasta} \\
diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf
index c4dea88..8e5c781 100644
--- a/modules/local/diann/individual_analysis/main.nf
+++ b/modules/local/diann/individual_analysis/main.nf
@@ -25,7 +25,9 @@ process INDIVIDUAL_ANALYSIS {
     def blocked = ['--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out', '--rt-profiling',
          '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta',
          '--mass-acc', '--mass-acc-ms1', '--window',
-         '--no-ifs-removal', '--no-main-report', '--relaxed-prot-inf', '--pg-level']
+         '--no-ifs-removal', '--no-main-report', '--relaxed-prot-inf', '--pg-level',
+         '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz',
+         '--monitor-mod', '--var-mod', '--fixed-mod']
     // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc)
     blocked.sort { a -> -a.length() }.each { flag ->
         def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*'
@@ -59,9 +61,15 @@ process INDIVIDUAL_ANALYSIS {
 
     diann_no_peptidoforms = params.diann_no_peptidoforms ? "--no-peptidoforms" : ""
 
+    // Per-file scan ranges from SDRF (empty = no flag, DIA-NN auto-detects)
+    min_pr_mz = meta['ms1minmz'] ? "--min-pr-mz ${meta['ms1minmz']}" : ""
+    max_pr_mz = meta['ms1maxmz'] ? "--max-pr-mz ${meta['ms1maxmz']}" : ""
+    min_fr_mz = meta['ms2minmz'] ? "--min-fr-mz ${meta['ms2minmz']}" : ""
+    max_fr_mz = meta['ms2maxmz'] ? "--max-fr-mz ${meta['ms2maxmz']}" : ""
+
     """
-    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice)
-    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ')
+    # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg
+    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ')
 
     diann   --lib ${library} \\
             --f ${ms_file} \\
@@ -76,6 +84,10 @@ process INDIVIDUAL_ANALYSIS {
             --no-main-report \\
             --relaxed-prot-inf \\
             --pg-level $params.pg_level \\
+            ${min_pr_mz} \\
+            ${max_pr_mz} \\
+            ${min_fr_mz} \\
+            ${max_fr_mz} \\
             ${diann_no_peptidoforms} \\
             \${mod_flags} \\
             $args
diff --git a/modules/local/diann/insilico_library_generation/main.nf b/modules/local/diann/insilico_library_generation/main.nf
index d61fc63..d76d79a 100644
--- a/modules/local/diann/insilico_library_generation/main.nf
+++ b/modules/local/diann/insilico_library_generation/main.nf
@@ -29,7 +29,7 @@ process INSILICO_LIBRARY_GENERATION {
          '--missed-cleavages', '--min-pep-len', '--max-pep-len',
          '--min-pr-charge', '--max-pr-charge', '--var-mods',
          '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz',
-         '--met-excision']
+         '--met-excision', '--monitor-mod']
     // Sort by length descending so longer flags (e.g. --fasta-search) are matched before shorter prefixes (--fasta, --f)
     blocked.sort { a -> -a.length() }.each { flag ->
         def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*'
diff --git a/modules/local/diann/preliminary_analysis/main.nf b/modules/local/diann/preliminary_analysis/main.nf
index f085343..81ea1db 100644
--- a/modules/local/diann/preliminary_analysis/main.nf
+++ b/modules/local/diann/preliminary_analysis/main.nf
@@ -25,7 +25,9 @@ process PRELIMINARY_ANALYSIS {
     def blocked = ['--use-quant', '--gen-spec-lib', '--out-lib', '--matrices', '--out',
          '--temp', '--threads', '--verbose', '--lib', '--f', '--fasta',
          '--mass-acc', '--mass-acc-ms1', '--window',
-         '--quick-mass-acc', '--min-corr', '--corr-diff', '--time-corr-only']
+         '--quick-mass-acc', '--min-corr', '--corr-diff', '--time-corr-only',
+         '--min-pr-mz', '--max-pr-mz', '--min-fr-mz', '--max-fr-mz',
+         '--monitor-mod', '--var-mod', '--fixed-mod']
     // Sort by length descending so longer flags (e.g. --mass-acc-ms1) are matched before shorter prefixes (--mass-acc)
     blocked.sort { a -> -a.length() }.each { flag ->
         def flagPattern = '(?<=^|\\s)' + java.util.regex.Pattern.quote(flag) + '(?=\\s|\$)(\\s+(?!-{1,2}[a-zA-Z])\\S+)*'
@@ -55,6 +57,12 @@ process PRELIMINARY_ANALYSIS {
     // Notes: Use double quotes for params, so that it is escaped in the shell.
     scan_window = params.scan_window_automatic ? '' : "--window $params.scan_window"
 
+    // Per-file scan ranges from SDRF (empty = no flag, DIA-NN auto-detects)
+    min_pr_mz = meta['ms1minmz'] ? "--min-pr-mz ${meta['ms1minmz']}" : ""
+    max_pr_mz = meta['ms1maxmz'] ? "--max-pr-mz ${meta['ms1maxmz']}" : ""
+    min_fr_mz = meta['ms2minmz'] ? "--min-fr-mz ${meta['ms2minmz']}" : ""
+    max_fr_mz = meta['ms2maxmz'] ? "--max-fr-mz ${meta['ms2maxmz']}" : ""
+
     """
     # Precursor Tolerance value was: ${meta['precursormasstolerance']}
     # Fragment Tolerance value was: ${meta['fragmentmasstolerance']}
@@ -63,7 +71,7 @@ process PRELIMINARY_ANALYSIS {
 
     # Final mass accuracy is '${mass_acc}'
 
-    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (DIA-NN best practice)
+    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod here: calibration needs all peptides)
     mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ')
 
     diann   --lib ${predict_library} \\
@@ -75,6 +83,10 @@ process PRELIMINARY_ANALYSIS {
             ${mass_acc} \\
             ${quick_mass_acc} \\
             ${performance_flags} \\
+            ${min_pr_mz} \\
+            ${max_pr_mz} \\
+            ${min_fr_mz} \\
+            ${max_fr_mz} \\
             ${diann_no_peptidoforms} \\
             \${mod_flags} \\
             $args
diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf
index ecb6e23..1c1c9d4 100644
--- a/modules/local/samplesheet_check/main.nf
+++ b/modules/local/samplesheet_check/main.nf
@@ -20,10 +20,6 @@ process SAMPLESHEET_CHECK {
 
     script:
     def args = task.ext.args ?: ''
-    def string_skip_sdrf_validation = params.validate_ontologies == false ? "--skip_sdrf_validation" : ""
-    def string_skip_ms_validation = params.skip_ms_validation == true ? "--skip_ms_validation" : ""
-    def string_skip_factor_validation = params.skip_factor_validation == true ? "--skip_factor_validation" : ""
-    def string_skip_experimental_design_validation = params.skip_experimental_design_validation == true ? "--skip_experimental_design_validation" : ""
     def string_use_ols_cache_only = params.use_ols_cache_only == true ? "--use_ols_cache_only" : ""
 
     """
@@ -40,11 +36,8 @@ process SAMPLESHEET_CHECK {
         cp "${input_file}" "\$OUTPUT_FILE"
     fi
 
-    quantmsutilsc checksamplesheet --exp_design "\$OUTPUT_FILE" --is_sdrf \\
-    ${string_skip_sdrf_validation} \\
-    ${string_skip_ms_validation} \\
-    ${string_skip_factor_validation} \\
-    ${string_skip_experimental_design_validation} \\
+    quantmsutilsc checksamplesheet --exp_design "\$OUTPUT_FILE" \\
+    --minimal \\
     ${string_use_ols_cache_only} \\
     $args \\
     2>&1 | tee input_check.log
diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf
index e379fac..d150321 100644
--- a/modules/local/sdrf_parsing/main.nf
+++ b/modules/local/sdrf_parsing/main.nf
@@ -10,33 +10,24 @@ process SDRF_PARSING {
     path sdrf
 
     output:
-    path "${sdrf.baseName}_openms_design.tsv", emit: ch_expdesign
-    path "${sdrf.baseName}_config.tsv"       , emit: ch_sdrf_config_file
-    path "*.log"                             , emit: log
-    path "versions.yml"                      , emit: versions
+    path "diann_design.tsv"  , emit: ch_sdrf_config_file
+    path "diann_design.tsv"  , emit: ch_expdesign
+    path "diann_config.cfg"  , emit: ch_diann_cfg
+    path "*.log"             , emit: log
+    path "versions.yml"      , emit: versions
 
     script:
     def args = task.ext.args ?: ''
-    if (params.convert_dotd) {
-        extensionconversions = ",.d.gz:.mzML,.d.tar.gz:.mzML,d.tar:.mzML,.d.zip:.mzML,.d:.mzML"
-    } else {
-        extensionconversions = ",.gz:,.tar.gz:,.tar:,.zip:"
-    }
+    def mod_loc_flag = (params.enable_mod_localization && params.mod_localization) ?
+        "--mod_localization '${params.mod_localization}'" : ''
 
     """
-    ## -t2 since the one-table format parser is broken in OpenMS2.5
-    ## -l for legacy behavior to always add sample columns
-
-    parse_sdrf convert-openms \\
-        -t2 -l \\
-        --extension_convert raw:mzML$extensionconversions \\
+    parse_sdrf convert-diann \\
         -s ${sdrf} \\
+        ${mod_loc_flag} \\
         $args \\
         2>&1 | tee ${sdrf.baseName}_parsing.log
 
-    mv openms.tsv ${sdrf.baseName}_config.tsv
-    mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv
-
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         sdrf-pipelines: \$(parse_sdrf --version 2>/dev/null | awk -F ' ' '{print \$2}')
diff --git a/nextflow.config b/nextflow.config
index 982e37d..bebc264 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -62,6 +62,12 @@ params {
     // Optional outputs — control which intermediate files are published
     save_speclib_tsv        = false  // Save the TSV spectral library from in-silico generation
 
+    // DIA-NN: PTM site localization (--monitor-mod)
+    enable_mod_localization  = false
+    // Comma-separated modification names, e.g. 'Phospho (S),Phospho (T),Phospho (Y)'
+    // or UniMod accessions, e.g. 'UniMod:21,UniMod:1'
+    mod_localization         = 'Phospho (S),Phospho (T),Phospho (Y)'
+
     // DIA-NN: PRELIMINARY_ANALYSIS — calibration & mass accuracy
     scan_window             = 8
     scan_window_automatic   = true
diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf
index 4f5503a..d44e51c 100644
--- a/subworkflows/local/create_input_channel/main.nf
+++ b/subworkflows/local/create_input_channel/main.nf
@@ -12,11 +12,12 @@ workflow CREATE_INPUT_CHANNEL {
     main:
     ch_versions = channel.empty()
 
-    // Always parse as SDRF (OpenMS experimental design format deprecated)
+    // Always parse as SDRF using DIA-NN converter
     SDRF_PARSING(ch_sdrf)
     ch_versions = ch_versions.mix(SDRF_PARSING.out.versions)
     ch_config = SDRF_PARSING.out.ch_sdrf_config_file
     ch_expdesign = SDRF_PARSING.out.ch_expdesign
+    ch_diann_cfg = SDRF_PARSING.out.ch_diann_cfg
 
     def Set enzymes = []
     def Set files = []
@@ -34,6 +35,7 @@ workflow CREATE_INPUT_CHANNEL {
     emit:
     ch_meta_config_dia // [meta, spectra_file]
     ch_expdesign
+    ch_diann_cfg
     versions = ch_versions
 }
 
@@ -44,7 +46,7 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) {
 
     // Always use SDRF format
     if (!params.root_folder) {
-        filestr = row.URI.toString()
+        filestr = row.URI?.toString()?.trim() ? row.URI.toString() : row.Filename.toString()
     }
     else {
         filestr = row.Filename.toString()
@@ -67,30 +69,22 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) {
     }
 
     // Validate acquisition method is DIA
-    if (row["Proteomics Data Acquisition Method"].toString().toLowerCase().contains("data-independent acquisition")) {
+    // AcquisitionMethod is already extracted by convert-diann (e.g. "Data-Independent Acquisition")
+    def acqMethod = row.AcquisitionMethod?.toString()?.trim() ?: ""
+    if (acqMethod.toLowerCase().contains("data-independent acquisition") || acqMethod.toLowerCase().contains("dia")) {
+        meta.acquisition_method = "dia"
+    }
+    else if (acqMethod.isEmpty()) {
+        // If no acquisition method column in SDRF, assume DIA (this is a DIA-only pipeline)
         meta.acquisition_method = "dia"
     }
     else {
-        log.error("This pipeline only supports Data-Independent Acquisition (DIA). Found: '${row["Proteomics Data Acquisition Method"]}'. Use the quantms pipeline for DDA workflows.")
+        log.error("This pipeline only supports Data-Independent Acquisition (DIA). Found: '${acqMethod}'. Use the quantms pipeline for DDA workflows.")
         exit(1)
     }
 
-    // dissociation method conversion
-    if (row.DissociationMethod == "COLLISION-INDUCED DISSOCIATION") {
-        meta.dissociationmethod = "CID"
-    }
-    else if (row.DissociationMethod == "HIGHER ENERGY BEAM-TYPE COLLISION-INDUCED DISSOCIATION") {
-        meta.dissociationmethod = "HCD"
-    }
-    else if (row.DissociationMethod == "ELECTRON TRANSFER DISSOCIATION") {
-        meta.dissociationmethod = "ETD"
-    }
-    else if (row.DissociationMethod == "ELECTRON CAPTURE DISSOCIATION") {
-        meta.dissociationmethod = "ECD"
-    }
-    else {
-        meta.dissociationmethod = row.DissociationMethod
-    }
+    // DissociationMethod is already normalized by convert-diann (HCD, CID, ETD, ECD)
+    meta.dissociationmethod = row.DissociationMethod?.toString()?.trim() ?: ""
 
     wrapper.acquisition_method = meta.acquisition_method
 
@@ -131,6 +125,7 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) {
             exit(1)
         }
     } else {
+        log.warn("No precursor mass tolerance in SDRF for '${filestr}'. Using default: ${params.precursor_mass_tolerance} ${params.precursor_mass_tolerance_unit}")
         meta.precursormasstolerance = params.precursor_mass_tolerance
     }
 
@@ -154,6 +149,7 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) {
             exit(1)
         }
     } else {
+        log.warn("No fragment mass tolerance in SDRF for '${filestr}'. Using default: ${params.fragment_mass_tolerance} ${params.fragment_mass_tolerance_unit}")
         meta.fragmentmasstolerance = params.fragment_mass_tolerance
     }
 
@@ -175,6 +171,12 @@ def create_meta_channel(LinkedHashMap row, enzymes, files, wrapper) {
         meta.variablemodifications = params.variable_mods
     }
 
+    // Per-file scan ranges (empty string = no flags passed, DIA-NN auto-detects)
+    meta.ms1minmz = row.MS1MinMz?.toString()?.trim() ?: ""
+    meta.ms1maxmz = row.MS1MaxMz?.toString()?.trim() ?: ""
+    meta.ms2minmz = row.MS2MinMz?.toString()?.trim() ?: ""
+    meta.ms2maxmz = row.MS2MaxMz?.toString()?.trim() ?: ""
+
     enzymes += row.Enzyme
     if (enzymes.size() > 1) {
         log.error("Currently only one enzyme is supported for the whole experiment. Specified was '${enzymes}'. Check or split your SDRF.")
diff --git a/workflows/dia.nf b/workflows/dia.nf
index 712f6db..e67cb54 100644
--- a/workflows/dia.nf
+++ b/workflows/dia.nf
@@ -7,7 +7,6 @@
 //
 // MODULES: Local to the pipeline
 //
-include { GENERATE_CFG                } from '../modules/local/diann/generate_cfg/main'
 include { DIANN_MSSTATS              } from '../modules/local/diann/diann_msstats/main'
 include { PRELIMINARY_ANALYSIS        } from '../modules/local/diann/preliminary_analysis/main'
 include { ASSEMBLE_EMPIRICAL_LIBRARY  } from '../modules/local/diann/assemble_empirical_library/main'
@@ -30,6 +29,7 @@ workflow DIA {
     take:
     ch_file_preparation_results
     ch_expdesign
+    ch_diann_cfg
 
     main:
 
@@ -44,12 +44,9 @@ workflow DIA {
 
     meta = ch_result.meta.unique { m -> m.experiment_id }
 
-    GENERATE_CFG(meta)
-    ch_software_versions = ch_software_versions
-        .mix(GENERATE_CFG.out.versions)
-
+    // diann_config.cfg comes directly from SDRF_PARSING (convert-diann)
     // Convert to value channel so it can be consumed by all per-file processes
-    ch_diann_cfg = GENERATE_CFG.out.diann_cfg.first()
+    ch_diann_cfg_val = ch_diann_cfg.first()
 
     //
     // MODULE: SILICOLIBRARYGENERATION
@@ -57,7 +54,7 @@ workflow DIA {
     if (params.diann_speclib != null && params.diann_speclib.toString() != "") {
         speclib = channel.from(file(params.diann_speclib, checkIfExists: true))
     } else {
-        INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg)
+        INSILICO_LIBRARY_GENERATION(ch_searchdb, ch_diann_cfg_val)
         speclib = INSILICO_LIBRARY_GENERATION.out.predict_speclib
     }
 
@@ -80,12 +77,12 @@ workflow DIA {
             empirical_lib_files = preanalysis_subset
                 .map { result -> result[1] }
                 .collect( sort: { a, b -> file(a).getName() <=> file(b).getName() } )
-            PRELIMINARY_ANALYSIS(preanalysis_subset.combine(speclib), ch_diann_cfg)
+            PRELIMINARY_ANALYSIS(preanalysis_subset.combine(speclib), ch_diann_cfg_val)
         } else {
             empirical_lib_files = ch_file_preparation_results
                 .map { result -> result[1] }
                 .collect( sort: { a, b -> file(a).getName() <=> file(b).getName() } )
-            PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib), ch_diann_cfg)
+            PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib), ch_diann_cfg_val)
         }
         ch_software_versions = ch_software_versions
             .mix(PRELIMINARY_ANALYSIS.out.versions)
@@ -99,7 +96,7 @@ workflow DIA {
             meta,
             PRELIMINARY_ANALYSIS.out.diann_quant.collect(),
             speclib,
-            ch_diann_cfg
+            ch_diann_cfg_val
         )
         ch_software_versions = ch_software_versions
             .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.versions)
@@ -114,7 +111,7 @@ workflow DIA {
     //
     // MODULE: INDIVIDUAL_ANALYSIS
     //
-    INDIVIDUAL_ANALYSIS(indiv_fin_analysis_in, ch_diann_cfg)
+    INDIVIDUAL_ANALYSIS(indiv_fin_analysis_in, ch_diann_cfg_val)
     ch_software_versions = ch_software_versions
         .mix(INDIVIDUAL_ANALYSIS.out.versions)
 
@@ -137,7 +134,7 @@ workflow DIA {
         empirical_lib,
         INDIVIDUAL_ANALYSIS.out.diann_quant.collect(),
         ch_searchdb,
-        ch_diann_cfg)
+        ch_diann_cfg_val)
 
     ch_software_versions = ch_software_versions.mix(
         FINAL_QUANTIFICATION.out.versions
@@ -179,6 +176,10 @@ def preprocessed_meta(LinkedHashMap meta) {
     parameters['fragmentmasstolerance']         = meta.fragmentmasstolerance
     parameters['fragmentmasstoleranceunit']     = meta.fragmentmasstoleranceunit
     parameters['enzyme']                        = meta.enzyme
+    parameters['ms1minmz']                      = meta.ms1minmz
+    parameters['ms1maxmz']                      = meta.ms1maxmz
+    parameters['ms2minmz']                      = meta.ms2minmz
+    parameters['ms2maxmz']                      = meta.ms2maxmz
 
     return parameters
 }
diff --git a/workflows/quantmsdiann.nf b/workflows/quantmsdiann.nf
index a5e4f4c..911c39e 100644
--- a/workflows/quantmsdiann.nf
+++ b/workflows/quantmsdiann.nf
@@ -86,6 +86,7 @@ workflow QUANTMSDIANN {
     DIA(
         ch_fileprep_result.dia,
         CREATE_INPUT_CHANNEL.out.ch_expdesign,
+        CREATE_INPUT_CHANNEL.out.ch_diann_cfg,
     )
     ch_pipeline_results = ch_pipeline_results.mix(DIA.out.diann_report)
     ch_msstats_in = ch_msstats_in.mix(DIA.out.msstats_in)

From 0d3f787f90080c4e491bd055dce9ae087f25c17e Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Mon, 23 Mar 2026 15:40:40 +0100
Subject: [PATCH 02/16] minor changes var_mod vs monitor-mod

---
 conf/tests/test_dia_local.config                     |  3 +++
 docs/usage.md                                        | 12 +++++++-----
 .../local/diann/assemble_empirical_library/main.nf   |  4 ++--
 modules/local/diann/preliminary_analysis/main.nf     |  4 ++--
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/conf/tests/test_dia_local.config b/conf/tests/test_dia_local.config
index 1dba6fc..8d523c5 100644
--- a/conf/tests/test_dia_local.config
+++ b/conf/tests/test_dia_local.config
@@ -9,6 +9,9 @@ process {
     withName: 'SDRF_PARSING' {
         container = 'docker.io/local/sdrf-pipelines:dev'
     }
+    withName: 'SAMPLESHEET_CHECK' {
+        container = 'docker.io/local/quantms-utils:dev'
+    }
     withName: 'DIANN_MSSTATS' {
         container = 'docker.io/local/quantms-utils:dev'
     }
diff --git a/docs/usage.md b/docs/usage.md
index 23fcc4b..eeef769 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -131,7 +131,7 @@ These parameters apply globally across all files. They are set in `diann_config.
 | `--cut` | (from SDRF enzyme) | — | ALL | Enzyme cut rule, derived from `comment[cleavage agent details]` |
 | `--fixed-mod` | (from SDRF) | — | ALL | Fixed modifications from `comment[modification parameters]` |
 | `--var-mod` | (from SDRF) | — | ALL | Variable modifications from `comment[modification parameters]` |
-| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | INDIVIDUAL, FINAL | PTM site localization scoring |
+| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | PRELIMINARY, ASSEMBLE, INDIVIDUAL, FINAL | PTM site localization scoring (DIA-NN 1.8.x only) |
 | `--window` | `--scan_window` | `8` | PRELIMINARY, ASSEMBLE, INDIVIDUAL | Scan window; auto-detected when `--scan_window_automatic=true` |
 | `--quick-mass-acc` | `--quick_mass_acc` | `true` | PRELIMINARY | Fast mass accuracy calibration |
 | `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode` | `true` | PRELIMINARY | High-speed, low-RAM mode |
@@ -143,11 +143,13 @@ These parameters apply globally across all files. They are set in `diann_config.
 
 DIA-NN supports PTM site localization scoring via `--monitor-mod`. When enabled, DIA-NN reports `PTM.Site.Confidence` and `PTM.Q.Value` columns for the specified modifications.
 
-**Important**: `--monitor-mod` is only applied to **INDIVIDUAL_ANALYSIS** and **FINAL_QUANTIFICATION**. It is intentionally excluded from earlier steps because:
+**Important**: `--monitor-mod` is applied to all DIA-NN steps **except INSILICO_LIBRARY_GENERATION** (where it has no effect). It is particularly important for:
 
-- **INSILICO_LIBRARY_GENERATION**: Library generation needs all peptides (modified + unmodified). `--monitor-mod` would filter to only modified peptides.
-- **PRELIMINARY_ANALYSIS**: Calibration needs all peptides for robust mass accuracy estimation.
-- **ASSEMBLE_EMPIRICAL_LIBRARY**: Library assembly needs broad peptide coverage.
+- **PRELIMINARY_ANALYSIS**: Affects PTM-aware scoring during calibration.
+- **ASSEMBLE_EMPIRICAL_LIBRARY**: Strongly affects empirical library generation for PTM peptides.
+- **INDIVIDUAL_ANALYSIS** and **FINAL_QUANTIFICATION**: Enables PTM site confidence scoring.
+
+Note: For DIA-NN 2.0+, `--monitor-mod` is no longer needed — PTM localization is handled automatically by `--var-mod`. The flag is only used for DIA-NN 1.8.x.
 
 To enable PTM site localization:
 
diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
index 8d266d3..4d0c42f 100644
--- a/modules/local/diann/assemble_empirical_library/main.nf
+++ b/modules/local/diann/assemble_empirical_library/main.nf
@@ -59,8 +59,8 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
 
     ls -lcth
 
-    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod: library assembly needs all peptides)
-    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ')
+    # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg
+    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ')
 
     diann   --f ${(ms_files as List).join(' --f ')} \\
             --lib ${lib} \\
diff --git a/modules/local/diann/preliminary_analysis/main.nf b/modules/local/diann/preliminary_analysis/main.nf
index 81ea1db..8a57241 100644
--- a/modules/local/diann/preliminary_analysis/main.nf
+++ b/modules/local/diann/preliminary_analysis/main.nf
@@ -71,8 +71,8 @@ process PRELIMINARY_ANALYSIS {
 
     # Final mass accuracy is '${mass_acc}'
 
-    # Extract --var-mod and --fixed-mod flags from diann_config.cfg (no --monitor-mod here: calibration needs all peptides)
-    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+)' | tr '\\n' ' ')
+    # Extract --var-mod, --fixed-mod, and --monitor-mod flags from diann_config.cfg
+    mod_flags=\$(cat ${diann_config} | grep -oP '(--var-mod\\s+\\S+|--fixed-mod\\s+\\S+|--monitor-mod\\s+\\S+)' | tr '\\n' ' ')
 
     diann   --lib ${predict_library} \\
             --f ${ms_file} \\

From 95c7dda2024d4600f10875b1efe2a8b43165d62a Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Tue, 24 Mar 2026 07:25:36 +0100
Subject: [PATCH 03/16] Minor changes to capture DIANN version

---
 conf/diann_versions/v2_1_0.config  | 2 ++
 conf/diann_versions/v2_2_0.config  | 2 ++
 modules/local/sdrf_parsing/main.nf | 2 ++
 nextflow.config                    | 1 +
 4 files changed, 7 insertions(+)

diff --git a/conf/diann_versions/v2_1_0.config b/conf/diann_versions/v2_1_0.config
index bedfe95..9915726 100644
--- a/conf/diann_versions/v2_1_0.config
+++ b/conf/diann_versions/v2_1_0.config
@@ -2,6 +2,8 @@
  * DIA-NN 2.1.0 container override (private ghcr.io)
  * Used by merge_ci.yml for version × feature matrix testing.
  */
+params.diann_version = '2.1.0'
+
 process {
     withLabel: diann {
         container = 'ghcr.io/bigbio/diann:2.1.0'
diff --git a/conf/diann_versions/v2_2_0.config b/conf/diann_versions/v2_2_0.config
index 1e79ea3..93ea4ee 100644
--- a/conf/diann_versions/v2_2_0.config
+++ b/conf/diann_versions/v2_2_0.config
@@ -2,6 +2,8 @@
  * DIA-NN 2.2.0 container override (private ghcr.io)
  * Used by merge_ci.yml for version × feature matrix testing.
  */
+params.diann_version = '2.2.0'
+
 process {
     withLabel: diann {
         container = 'ghcr.io/bigbio/diann:2.2.0'
diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf
index d150321..56b402e 100644
--- a/modules/local/sdrf_parsing/main.nf
+++ b/modules/local/sdrf_parsing/main.nf
@@ -20,11 +20,13 @@ process SDRF_PARSING {
     def args = task.ext.args ?: ''
     def mod_loc_flag = (params.enable_mod_localization && params.mod_localization) ?
         "--mod_localization '${params.mod_localization}'" : ''
+    def diann_version_flag = params.diann_version ? "--diann_version '${params.diann_version}'" : ''
 
     """
     parse_sdrf convert-diann \\
         -s ${sdrf} \\
         ${mod_loc_flag} \\
+        ${diann_version_flag} \\
         $args \\
         2>&1 | tee ${sdrf.baseName}_parsing.log
 
diff --git a/nextflow.config b/nextflow.config
index e710a44..79d8ec7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -55,6 +55,7 @@ params {
     convert_dotd            = false
 
     // DIA-NN: General
+    diann_version           = '1.8.1'   // Used to control version-dependent flags (e.g. --monitor-mod for 1.8.x)
     diann_debug             = 3
     diann_speclib           = null
     diann_extra_args        = null

From 64ccaf4199b2c7a89977627f810704d7954d972c Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Tue, 24 Mar 2026 07:25:57 +0100
Subject: [PATCH 04/16] specify version for 1.8.1

---
 conf/diann_versions/v1_8_1.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/diann_versions/v1_8_1.config b/conf/diann_versions/v1_8_1.config
index 2821ee2..5bfb7ef 100644
--- a/conf/diann_versions/v1_8_1.config
+++ b/conf/diann_versions/v1_8_1.config
@@ -2,6 +2,8 @@
  * DIA-NN 1.8.1 container override (public biocontainers)
  * Used by merge_ci.yml for version × feature matrix testing.
  */
+params.diann_version = '1.8.1'
+
 process {
     withLabel: diann {
         container = 'docker.io/biocontainers/diann:v1.8.1_cv1'

From 4e72dbec7c721744756261e6d98ac5fde636afec Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Tue, 24 Mar 2026 10:19:25 +0100
Subject: [PATCH 05/16] minor changes

---
 docs/usage.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/docs/usage.md b/docs/usage.md
index eeef769..8eb8a7d 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -241,6 +241,58 @@ Use `screen`, `tmux`, or the Nextflow `-bg` flag to run the pipeline in the back
 nextflow run bigbio/quantmsdiann -profile docker --input sdrf.tsv --database db.fasta --outdir results -bg
 ```
 
+## Developer testing with local containers
+
+When developing changes to `sdrf-pipelines` or `quantms-utils`, you can build local Docker containers and test them with the pipeline without publishing to a registry.
+
+### 1. Build local dev containers
+
+```bash
+# From sdrf-pipelines repo
+cd /path/to/sdrf-pipelines
+docker build -f Dockerfile.dev -t local/sdrf-pipelines:dev .
+
+# From quantms-utils repo
+cd /path/to/quantms-utils
+docker build -f Dockerfile.dev -t local/quantms-utils:dev .
+```
+
+### 2. Run the pipeline with local containers
+
+Use the `test_dia_local.config` to override container references:
+
+```bash
+nextflow run main.nf \
+    -profile test_dia,docker \
+    -c conf/tests/test_dia_local.config \
+    --outdir results
+```
+
+This config (`conf/tests/test_dia_local.config`) overrides:
+- `SDRF_PARSING` → `local/sdrf-pipelines:dev`
+- `SAMPLESHEET_CHECK` → `local/quantms-utils:dev`
+- `DIANN_MSSTATS` → `local/quantms-utils:dev`
+
+### 3. Using pre-converted mzML files
+
+To skip ThermoRawFileParser (useful on macOS/ARM where Mono crashes):
+
+```bash
+# Convert raw files with ThermoRawFileParser v2.0+
+docker run --rm --platform=linux/amd64 \
+    -v /path/to/raw:/data -v /path/to/mzml:/out \
+    quay.io/biocontainers/thermorawfileparser:2.0.0.dev--h9ee0642_0 \
+    ThermoRawFileParser -d /data -o /out -f 2
+
+# Run pipeline with pre-converted files
+nextflow run main.nf \
+    -profile test_dia,docker \
+    -c conf/tests/test_dia_local.config \
+    --root_folder /path/to/mzml \
+    --local_input_type mzML \
+    --outdir results
+```
+
 ## Nextflow memory requirements
 
 Add the following to your environment to limit Java memory:

From 3dfa38c107ff760de8148a85b0461b0949f22347 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Sun, 29 Mar 2026 18:06:16 +0800
Subject: [PATCH 06/16] update

---
 modules/local/diann/diann_msstats/main.nf   |  4 ++--
 modules/local/diann/generate_cfg/main.nf    |  4 ++--
 modules/local/pmultiqc/main.nf              | 12 ++++++------
 modules/local/samplesheet_check/main.nf     |  4 ++--
 modules/local/sdrf_parsing/main.nf          |  4 ++--
 modules/local/utils/mzml_statistics/main.nf |  4 ++--
 workflows/dia.nf                            |  1 +
 workflows/quantmsdiann.nf                   |  1 +
 8 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/modules/local/diann/diann_msstats/main.nf b/modules/local/diann/diann_msstats/main.nf
index a767910..b2e96ff 100644
--- a/modules/local/diann/diann_msstats/main.nf
+++ b/modules/local/diann/diann_msstats/main.nf
@@ -3,8 +3,8 @@ process DIANN_MSSTATS {
     label 'process_medium'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
 
     input:
     path(report)
diff --git a/modules/local/diann/generate_cfg/main.nf b/modules/local/diann/generate_cfg/main.nf
index 9a4adef..7acf515 100644
--- a/modules/local/diann/generate_cfg/main.nf
+++ b/modules/local/diann/generate_cfg/main.nf
@@ -3,8 +3,8 @@ process GENERATE_CFG {
     label 'process_tiny'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
 
     input:
     val(meta)
diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf
index 844fe24..af25267 100644
--- a/modules/local/pmultiqc/main.nf
+++ b/modules/local/pmultiqc/main.nf
@@ -2,8 +2,8 @@ process PMULTIQC {
     label 'process_high'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.39--pyhdfd78af_0' :
-        'biocontainers/pmultiqc:0.0.39--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.42--pyhdfd78af_0' :
+        'biocontainers/pmultiqc:0.0.42--pyhdfd78af_0' }"
 
     input:
     path 'results/*'
@@ -17,10 +17,10 @@ process PMULTIQC {
 
     script:
     def args = task.ext.args ?: ''
-    def disable_pmultiqc = (params.enable_pmultiqc) ? "--quantms_plugin" : ""
-    def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : ""
-    def disable_idxml_index = (params.enable_pmultiqc) && (params.pmultiqc_idxml_skip) ? "--ignored_idxml" : ""
-    def contaminant_affix = params.contaminant_string ? "--contaminant_affix ${params.contaminant_string}" : ""
+    def disable_pmultiqc = (params.enable_pmultiqc) ? "--quantms-plugin" : ""
+    def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable-table" : ""
+    def disable_idxml_index = (params.enable_pmultiqc) && (params.pmultiqc_idxml_skip) ? "--ignored-idxml" : ""
+    def contaminant_affix = params.contaminant_string ? "--contaminant-affix ${params.contaminant_string}" : ""
 
     """
     set -x
diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf
index 1c1c9d4..09a1303 100644
--- a/modules/local/samplesheet_check/main.nf
+++ b/modules/local/samplesheet_check/main.nf
@@ -4,8 +4,8 @@ process SAMPLESHEET_CHECK {
     label 'process_tiny'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
 
     input:
     path input_file
diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf
index 56b402e..ef9367b 100644
--- a/modules/local/sdrf_parsing/main.nf
+++ b/modules/local/sdrf_parsing/main.nf
@@ -3,8 +3,8 @@ process SDRF_PARSING {
     label 'process_tiny'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.33--pyhdfd78af_0' :
-        'biocontainers/sdrf-pipelines:0.0.33--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.1.2--pyhdfd78af_0' :
+        'biocontainers/sdrf-pipelines:0.1.2--pyhdfd78af_0' }"
 
     input:
     path sdrf
diff --git a/modules/local/utils/mzml_statistics/main.nf b/modules/local/utils/mzml_statistics/main.nf
index cfa2c2b..86bd694 100644
--- a/modules/local/utils/mzml_statistics/main.nf
+++ b/modules/local/utils/mzml_statistics/main.nf
@@ -4,8 +4,8 @@ process MZML_STATISTICS {
     label 'process_single'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.25--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.25--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
 
     input:
     tuple val(meta), path(ms_file)
diff --git a/workflows/dia.nf b/workflows/dia.nf
index e67cb54..c35c7a3 100644
--- a/workflows/dia.nf
+++ b/workflows/dia.nf
@@ -159,6 +159,7 @@ workflow DIA {
     emit:
     versions                = ch_software_versions
     diann_report            = diann_main_report
+    diann_log               = FINAL_QUANTIFICATION.out.log
     msstats_in              = DIANN_MSSTATS.out.out_msstats
 }
 
diff --git a/workflows/quantmsdiann.nf b/workflows/quantmsdiann.nf
index 911c39e..9d869ac 100644
--- a/workflows/quantmsdiann.nf
+++ b/workflows/quantmsdiann.nf
@@ -116,6 +116,7 @@ workflow QUANTMSDIANN {
     ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config)
     ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics)
+    ch_multiqc_files = ch_multiqc_files.mix(DIA.out.diann_log)
     ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
     ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false))
     ch_multiqc_quantms_logo = file("${projectDir}/assets/nf-core-quantmsdiann_logo_light.png")

From c46b7dcbdb874e0bf7fe1b41f8f0b5516f31d249 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Tue, 31 Mar 2026 12:31:19 +0800
Subject: [PATCH 07/16] extract calibration params to meta

---
 .../diann/assemble_empirical_library/main.nf  |  9 ++++
 .../diann/assemble_empirical_library/meta.yml |  4 ++
 .../local/diann/individual_analysis/main.nf   | 42 ++++++++++++++-----
 .../local/diann/individual_analysis/meta.yml  |  4 --
 nextflow.config                               |  4 ++
 nextflow_schema.json                          | 14 +++++++
 workflows/dia.nf                              | 42 +++++++++++++++----
 7 files changed, 96 insertions(+), 23 deletions(-)

diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
index 034b95e..2bb3e37 100644
--- a/modules/local/diann/assemble_empirical_library/main.nf
+++ b/modules/local/diann/assemble_empirical_library/main.nf
@@ -19,6 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
     path "empirical_library.*", emit: empirical_library
     path "assemble_empirical_library.log", emit: log
     path "versions.yml", emit: versions
+    path "diann_calibrated_params.csv", emit: calibrated_params
 
     when:
     task.ext.when == null || task.ext.when
@@ -83,6 +84,14 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
 
     cp report.log.txt assemble_empirical_library.log
 
+    val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]")
+    val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]")
+    val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]")
+    if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi
+    if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi
+    if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi
+    echo "\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" > diann_calibrated_params.csv
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?")
diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml
index 0d1f5b7..c6ad7ab 100644
--- a/modules/local/diann/assemble_empirical_library/meta.yml
+++ b/modules/local/diann/assemble_empirical_library/meta.yml
@@ -35,5 +35,9 @@ output:
       type: file
       description: File containing software version
       pattern: "versions.yml"
+  - calibrated_params:
+      type: file
+      description: A file containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
+      pattern: "diann_calibrated_params.csv"
 authors:
   - "@daichengxin"
diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf
index 0ba54e8..0df29dd 100644
--- a/modules/local/diann/individual_analysis/main.nf
+++ b/modules/local/diann/individual_analysis/main.nf
@@ -8,7 +8,7 @@ process INDIVIDUAL_ANALYSIS {
         'docker.io/biocontainers/diann:v1.8.1_cv1' }"
 
     input:
-    tuple val(meta), path(ms_file), path(fasta), path(diann_log), path(library)
+    tuple val(meta), path(ms_file), path(fasta), path(library)
     path(diann_config)
 
     output:
@@ -44,19 +44,39 @@ process INDIVIDUAL_ANALYSIS {
         }
     }
 
-    scan_window = params.scan_window
-
-    if (params.mass_acc_automatic | params.scan_window_automatic) {
-        mass_acc_ms2 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 11 | tr -cd \"[0-9]\")"
-        scan_window = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 19 | tr -cd \"[0-9]\")"
-        mass_acc_ms1 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 15 | tr -cd \"[0-9]\")"
-    } else if (meta['precursormasstoleranceunit'].toLowerCase().endsWith('ppm') && meta['fragmentmasstoleranceunit'].toLowerCase().endsWith('ppm')) {
+    if (params.mass_acc_automatic || params.scan_window_automatic) {
+        if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
+            mass_acc_ms2 = meta.mass_acc_ms2
+            mass_acc_ms1 = meta.mass_acc_ms1
+            scan_window  = meta.scan_window
+        } 
+        else if (meta['fragmentmasstolerance']) {
+            mass_acc_ms2 = meta['fragmentmasstolerance']
+            mass_acc_ms1 = meta['precursormasstolerance']
+            scan_window  = params.scan_window
+        } 
+        else {
+            mass_acc_ms2 = params.mass_acc_ms2
+            mass_acc_ms1 = params.mass_acc_ms1
+            scan_window  = params.scan_window
+        }
+    } else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
         mass_acc_ms1 = meta["precursormasstolerance"]
         mass_acc_ms2 = meta["fragmentmasstolerance"]
     } else {
-        mass_acc_ms2 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 11 | tr -cd \"[0-9]\")"
-        scan_window = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 19 | tr -cd \"[0-9]\")"
-        mass_acc_ms1 = "\$(cat ${diann_log} | grep \"Averaged recommended settings\" | cut -d ' ' -f 15 | tr -cd \"[0-9]\")"
+        if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
+            mass_acc_ms2 = meta.mass_acc_ms2
+            mass_acc_ms1 = meta.mass_acc_ms1
+            scan_window  = meta.scan_window
+        } else if (meta['fragmentmasstolerance']) {
+            mass_acc_ms2 = meta['fragmentmasstolerance']
+            mass_acc_ms1 = meta['precursormasstolerance']
+            scan_window  = params.scan_window
+        } else {
+            mass_acc_ms2 = params.mass_acc_ms2
+            mass_acc_ms1 = params.mass_acc_ms1
+            scan_window  = params.scan_window
+        }
     }
 
     diann_no_peptidoforms = params.diann_no_peptidoforms ? "--no-peptidoforms" : ""
diff --git a/modules/local/diann/individual_analysis/meta.yml b/modules/local/diann/individual_analysis/meta.yml
index 655a16f..f7ffe35 100644
--- a/modules/local/diann/individual_analysis/meta.yml
+++ b/modules/local/diann/individual_analysis/meta.yml
@@ -10,10 +10,6 @@ tools:
       homepage: https://github.com/vdemichev/DiaNN
       documentation: https://github.com/vdemichev/DiaNN
 input:
-  - diann_log:
-      type: file
-      description: DIA-NN log file
-      pattern: "assemble_empirical_library.log"
   - empirical_library:
       type: file
       description: An empirical spectral library from the .quant files.
diff --git a/nextflow.config b/nextflow.config
index 79d8ec7..636562b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -85,6 +85,10 @@ params {
     random_preanalysis_seed      = 42
     empirical_assembly_ms_n      = 200
 
+    // DIA-NN: INDIVIDUAL_ANALYSIS
+    mass_acc_ms2    = 15
+    mass_acc_ms1    = 15
+
     // DIA-NN: FINAL_QUANTIFICATION — summarization & output
     pg_level                = 2
     species_genes           = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d1215b4..c992d3e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -321,6 +321,20 @@
                     "help_text": " Ideally, should be approximately equal to the average number of data points per peak",
                     "default": 8
                 },
+                "mass_acc_ms2": {
+                    "type": "number",
+                    "description": "Set the MS2 mass accuracy (tolerance) to a specific value in ppm.",
+                    "fa_icon": "fas fa-bullseye",
+                    "help_text": "If specified, this overrides the automatic calibration. Corresponds to the --mass-acc parameter in DIA-NN.",
+                    "default": 15
+                },
+                "mass_acc_ms1": {
+                    "type": "number",
+                    "description": "Set the MS1 mass accuracy (tolerance) to a specific value in ppm.",
+                    "fa_icon": "fas fa-bullseye",
+                    "help_text": "If specified, this overrides the automatic calibration. Corresponds to the --mass-acc-ms1 parameter in DIA-NN.",
+                    "default": 15
+                },
                 "performance_mode": {
                     "type": "boolean",
                     "description": "Set Low RAM & High Speed Mode for DIANN, including min-corr, corr-diff, and time-corr-only three parameters",
diff --git a/workflows/dia.nf b/workflows/dia.nf
index c35c7a3..c6c7ad6 100644
--- a/workflows/dia.nf
+++ b/workflows/dia.nf
@@ -59,12 +59,30 @@ workflow DIA {
     }
 
     if (params.skip_preliminary_analysis) {
-        assembly_log = channel.fromPath(params.empirical_assembly_log)
-        empirical_library = channel.fromPath(params.diann_speclib)
-        indiv_fin_analysis_in = ch_file_preparation_results.combine(ch_searchdb)
-            .combine(assembly_log)
-            .combine(empirical_library)
-        empirical_lib = empirical_library
+        def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null
+        def parsed_m2 = "0"
+        def parsed_m1 = "0"
+        def parsed_w  = "0"        
+        if (log_file && log_file.exists()) {
+            def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/
+            if (matcher) {
+                parsed_m2 = matcher[0][1]
+                parsed_m1 = matcher[0][2]
+                parsed_w  = matcher[0][3]
+            }
+        }        
+        indiv_fin_analysis_in = ch_file_preparation_results
+            .combine(ch_searchdb)
+            .combine(speclib)
+            .map { meta_map, ms_file, fasta, library ->
+                def new_meta = meta_map + [
+                    mass_acc_ms2 : parsed_m2,
+                    mass_acc_ms1 : parsed_m1,
+                    scan_window  : parsed_w
+                ]
+                return [ new_meta, ms_file, fasta, library ]
+            }
+        empirical_lib = speclib
     } else {
         //
         // MODULE: PRELIMINARY_ANALYSIS
@@ -102,9 +120,17 @@ workflow DIA {
             .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.versions)
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
-            .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.log)
             .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library)
-
+            .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params)
+            .map { meta_map, ms_file, fasta, library, param_file ->
+                def values = param_file.text.trim().split(',')
+                def new_meta = meta_map + [
+                    mass_acc_ms2 : values[0],
+                    mass_acc_ms1 : values[1],
+                    scan_window  : values[2]
+                ]
+                return [ new_meta, ms_file, fasta, library ]
+            }
         empirical_lib = ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library
     }
 

From 5061ed18541ab712f0f60f60b429f416e3887609 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Tue, 31 Mar 2026 13:24:52 +0800
Subject: [PATCH 08/16] update

---
 .gitignore                                    |  2 +-
 docs/usage.md                                 | 71 ++++++++++---------
 modules/local/diann/diann_msstats/main.nf     |  4 +-
 modules/local/diann/generate_cfg/main.nf      |  4 +-
 .../local/diann/individual_analysis/main.nf   |  4 +-
 modules/local/pmultiqc/main.nf                |  4 +-
 modules/local/samplesheet_check/main.nf       |  4 +-
 modules/local/utils/mzml_statistics/main.nf   |  4 +-
 nextflow_schema.json                          | 15 ++++
 workflows/dia.nf                              |  4 +-
 10 files changed, 67 insertions(+), 49 deletions(-)

diff --git a/.gitignore b/.gitignore
index 114452d..10bcc4e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,4 +17,4 @@ null/
 .cursor/rules/codacy.mdc
 .codacy/
 .github/instructions/codacy.instructions.md
-docs/superpowers/
\ No newline at end of file
+docs/superpowers/
diff --git a/docs/usage.md b/docs/usage.md
index 8eb8a7d..a9b652a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -95,49 +95,50 @@ The pipeline passes parameters to DIA-NN at different steps. Some parameters com
 ### Parameter sources
 
 Parameters are resolved in this priority order:
+
 1. **SDRF metadata** (per-file, from `convert-diann` design file) — highest priority
 2. **Pipeline parameters** (`--param_name` on command line or params file)
 3. **Nextflow defaults** (`nextflow.config`) — lowest priority
 
 ### Pipeline steps
 
-| Step | Description |
-|------|-------------|
+| Step                            | Description                                                         |
+| ------------------------------- | ------------------------------------------------------------------- |
 | **INSILICO_LIBRARY_GENERATION** | Predicts a spectral library from FASTA using DIA-NN's deep learning |
-| **PRELIMINARY_ANALYSIS** | Per-file calibration and mass accuracy estimation (first pass) |
-| **ASSEMBLE_EMPIRICAL_LIBRARY** | Builds consensus empirical library from preliminary results |
-| **INDIVIDUAL_ANALYSIS** | Per-file quantification with the empirical library (second pass) |
-| **FINAL_QUANTIFICATION** | Aggregates all files into protein/peptide matrices |
+| **PRELIMINARY_ANALYSIS**        | Per-file calibration and mass accuracy estimation (first pass)      |
+| **ASSEMBLE_EMPIRICAL_LIBRARY**  | Builds consensus empirical library from preliminary results         |
+| **INDIVIDUAL_ANALYSIS**         | Per-file quantification with the empirical library (second pass)    |
+| **FINAL_QUANTIFICATION**        | Aggregates all files into protein/peptide matrices                  |
 
 ### Per-file parameters from SDRF
 
 These parameters are extracted per-file from the SDRF via `convert-diann` and stored in `diann_design.tsv`:
 
-| DIA-NN flag | SDRF column | Design column | Steps | Notes |
-|---|---|---|---|---|
-| `--mass-acc-ms1` | `comment[precursor mass tolerance]` | `PrecursorMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm |
-| `--mass-acc` | `comment[fragment mass tolerance]` | `FragmentMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm |
-| `--min-pr-mz` | `comment[ms1 scan range]` or `comment[ms min mz]` | `MS1MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
-| `--max-pr-mz` | `comment[ms1 scan range]` or `comment[ms max mz]` | `MS1MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
-| `--min-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 min mz]` | `MS2MinMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
-| `--max-fr-mz` | `comment[ms2 scan range]` or `comment[ms2 max mz]` | `MS2MaxMz` | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO |
+| DIA-NN flag      | SDRF column                                        | Design column            | Steps                   | Notes                                           |
+| ---------------- | -------------------------------------------------- | ------------------------ | ----------------------- | ----------------------------------------------- |
+| `--mass-acc-ms1` | `comment[precursor mass tolerance]`                | `PrecursorMassTolerance` | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm |
+| `--mass-acc`     | `comment[fragment mass tolerance]`                 | `FragmentMassTolerance`  | PRELIMINARY, INDIVIDUAL | Falls back to auto-detect if missing or not ppm |
+| `--min-pr-mz`    | `comment[ms1 scan range]` or `comment[ms min mz]`  | `MS1MinMz`               | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO  |
+| `--max-pr-mz`    | `comment[ms1 scan range]` or `comment[ms max mz]`  | `MS1MaxMz`               | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO  |
+| `--min-fr-mz`    | `comment[ms2 scan range]` or `comment[ms2 min mz]` | `MS2MinMz`               | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO  |
+| `--max-fr-mz`    | `comment[ms2 scan range]` or `comment[ms2 max mz]` | `MS2MaxMz`               | PRELIMINARY, INDIVIDUAL | Per-file for GPF; global broadest for INSILICO  |
 
 ### Global parameters from config
 
 These parameters apply globally across all files. They are set in `diann_config.cfg` (from SDRF) or as pipeline parameters:
 
-| DIA-NN flag | Pipeline parameter | Default | Steps | Notes |
-|---|---|---|---|---|
-| `--cut` | (from SDRF enzyme) | — | ALL | Enzyme cut rule, derived from `comment[cleavage agent details]` |
-| `--fixed-mod` | (from SDRF) | — | ALL | Fixed modifications from `comment[modification parameters]` |
-| `--var-mod` | (from SDRF) | — | ALL | Variable modifications from `comment[modification parameters]` |
-| `--monitor-mod` | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | PRELIMINARY, ASSEMBLE, INDIVIDUAL, FINAL | PTM site localization scoring (DIA-NN 1.8.x only) |
-| `--window` | `--scan_window` | `8` | PRELIMINARY, ASSEMBLE, INDIVIDUAL | Scan window; auto-detected when `--scan_window_automatic=true` |
-| `--quick-mass-acc` | `--quick_mass_acc` | `true` | PRELIMINARY | Fast mass accuracy calibration |
-| `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode` | `true` | PRELIMINARY | High-speed, low-RAM mode |
-| `--pg-level` | `--pg_level` | `2` | INDIVIDUAL, FINAL | Protein grouping level |
-| `--species-genes` | `--species_genes` | `false` | FINAL | Use species-specific gene names |
-| `--no-norm` | `--diann_normalize` | `true` | FINAL | Disable normalization when `false` |
+| DIA-NN flag                                   | Pipeline parameter                                 | Default                                         | Steps                                    | Notes                                                           |
+| --------------------------------------------- | -------------------------------------------------- | ----------------------------------------------- | ---------------------------------------- | --------------------------------------------------------------- |
+| `--cut`                                       | (from SDRF enzyme)                                 | —                                               | ALL                                      | Enzyme cut rule, derived from `comment[cleavage agent details]` |
+| `--fixed-mod`                                 | (from SDRF)                                        | —                                               | ALL                                      | Fixed modifications from `comment[modification parameters]`     |
+| `--var-mod`                                   | (from SDRF)                                        | —                                               | ALL                                      | Variable modifications from `comment[modification parameters]`  |
+| `--monitor-mod`                               | `--enable_mod_localization` + `--mod_localization` | `false` / `Phospho (S),Phospho (T),Phospho (Y)` | PRELIMINARY, ASSEMBLE, INDIVIDUAL, FINAL | PTM site localization scoring (DIA-NN 1.8.x only)               |
+| `--window`                                    | `--scan_window`                                    | `8`                                             | PRELIMINARY, ASSEMBLE, INDIVIDUAL        | Scan window; auto-detected when `--scan_window_automatic=true`  |
+| `--quick-mass-acc`                            | `--quick_mass_acc`                                 | `true`                                          | PRELIMINARY                              | Fast mass accuracy calibration                                  |
+| `--min-corr 2 --corr-diff 1 --time-corr-only` | `--performance_mode`                               | `true`                                          | PRELIMINARY                              | High-speed, low-RAM mode                                        |
+| `--pg-level`                                  | `--pg_level`                                       | `2`                                             | INDIVIDUAL, FINAL                        | Protein grouping level                                          |
+| `--species-genes`                             | `--species_genes`                                  | `false`                                         | FINAL                                    | Use species-specific gene names                                 |
+| `--no-norm`                                   | `--diann_normalize`                                | `true`                                          | FINAL                                    | Disable normalization when `false`                              |
 
 ### PTM site localization (`--monitor-mod`)
 
@@ -161,19 +162,20 @@ nextflow run bigbio/quantmsdiann \
 ```
 
 The parameter accepts two formats:
+
 - **Modification names** (quantms-compatible): `Phospho (S),Phospho (T),Phospho (Y)` — site info in parentheses is stripped, the base name is mapped to UniMod
 - **UniMod accessions** (direct): `UniMod:21,UniMod:1`
 
 Supported modification name mappings:
 
-| Name | UniMod ID | Example |
-|---|---|---|
-| Phospho | `UniMod:21` | `Phospho (S),Phospho (T),Phospho (Y)` |
-| GlyGly | `UniMod:121` | `GlyGly (K)` |
-| Acetyl | `UniMod:1` | `Acetyl (Protein N-term)` |
-| Oxidation | `UniMod:35` | `Oxidation (M)` |
-| Deamidated | `UniMod:7` | `Deamidated (N),Deamidated (Q)` |
-| Methylation | `UniMod:34` | `Methylation (K),Methylation (R)` |
+| Name        | UniMod ID    | Example                               |
+| ----------- | ------------ | ------------------------------------- |
+| Phospho     | `UniMod:21`  | `Phospho (S),Phospho (T),Phospho (Y)` |
+| GlyGly      | `UniMod:121` | `GlyGly (K)`                          |
+| Acetyl      | `UniMod:1`   | `Acetyl (Protein N-term)`             |
+| Oxidation   | `UniMod:35`  | `Oxidation (M)`                       |
+| Deamidated  | `UniMod:7`   | `Deamidated (N),Deamidated (Q)`       |
+| Methylation | `UniMod:34`  | `Methylation (K),Methylation (R)`     |
 
 ## Optional outputs
 
@@ -269,6 +271,7 @@ nextflow run main.nf \
 ```
 
 This config (`conf/tests/test_dia_local.config`) overrides:
+
 - `SDRF_PARSING` → `local/sdrf-pipelines:dev`
 - `SAMPLESHEET_CHECK` → `local/quantms-utils:dev`
 - `DIANN_MSSTATS` → `local/quantms-utils:dev`
diff --git a/modules/local/diann/diann_msstats/main.nf b/modules/local/diann/diann_msstats/main.nf
index b2e96ff..470309d 100644
--- a/modules/local/diann/diann_msstats/main.nf
+++ b/modules/local/diann/diann_msstats/main.nf
@@ -3,8 +3,8 @@ process DIANN_MSSTATS {
     label 'process_medium'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }"
 
     input:
     path(report)
diff --git a/modules/local/diann/generate_cfg/main.nf b/modules/local/diann/generate_cfg/main.nf
index 7acf515..8377030 100644
--- a/modules/local/diann/generate_cfg/main.nf
+++ b/modules/local/diann/generate_cfg/main.nf
@@ -3,8 +3,8 @@ process GENERATE_CFG {
     label 'process_tiny'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }"
 
     input:
     val(meta)
diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf
index 0df29dd..36502b5 100644
--- a/modules/local/diann/individual_analysis/main.nf
+++ b/modules/local/diann/individual_analysis/main.nf
@@ -49,12 +49,12 @@ process INDIVIDUAL_ANALYSIS {
             mass_acc_ms2 = meta.mass_acc_ms2
             mass_acc_ms1 = meta.mass_acc_ms1
             scan_window  = meta.scan_window
-        } 
+        }
         else if (meta['fragmentmasstolerance']) {
             mass_acc_ms2 = meta['fragmentmasstolerance']
             mass_acc_ms1 = meta['precursormasstolerance']
             scan_window  = params.scan_window
-        } 
+        }
         else {
             mass_acc_ms2 = params.mass_acc_ms2
             mass_acc_ms1 = params.mass_acc_ms1
diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf
index af25267..f9d1964 100644
--- a/modules/local/pmultiqc/main.nf
+++ b/modules/local/pmultiqc/main.nf
@@ -2,8 +2,8 @@ process PMULTIQC {
     label 'process_high'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.42--pyhdfd78af_0' :
-        'biocontainers/pmultiqc:0.0.42--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/pmultiqc:0.0.43--pyhdfd78af_0' :
+        'biocontainers/pmultiqc:0.0.43--pyhdfd78af_0' }"
 
     input:
     path 'results/*'
diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf
index 09a1303..f2b7112 100644
--- a/modules/local/samplesheet_check/main.nf
+++ b/modules/local/samplesheet_check/main.nf
@@ -4,8 +4,8 @@ process SAMPLESHEET_CHECK {
     label 'process_tiny'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }"
 
     input:
     path input_file
diff --git a/modules/local/utils/mzml_statistics/main.nf b/modules/local/utils/mzml_statistics/main.nf
index 86bd694..f6a96d4 100644
--- a/modules/local/utils/mzml_statistics/main.nf
+++ b/modules/local/utils/mzml_statistics/main.nf
@@ -4,8 +4,8 @@ process MZML_STATISTICS {
     label 'process_single'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.27--pyh106432d_0' :
-        'biocontainers/quantms-utils:0.0.27--pyh106432d_0' }"
+        'https://depot.galaxyproject.org/singularity/quantms-utils:0.0.28--pyh106432d_0' :
+        'biocontainers/quantms-utils:0.0.28--pyh106432d_0' }"
 
     input:
     tuple val(meta), path(ms_file)
diff --git a/nextflow_schema.json b/nextflow_schema.json
index c992d3e..383909c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -302,6 +302,21 @@
             "description": "Settings for DIA-NN - a universal software for data-independent acquisition (DIA) proteomics data processing.",
             "default": "",
             "properties": {
+                "diann_version": {
+                    "type": "string",
+                    "description": "Specify the DIA-NN version to be used in the workflow.",
+                    "fa_icon": "fas fa-tag"
+                },
+                "enable_mod_localization": {
+                    "type": "boolean",
+                    "description": "Enable or disable modification localization scoring in DIA-NN.",
+                    "fa_icon": "fas fa-map-marker-alt"
+                },
+                "mod_localization": {
+                    "type": "string",
+                    "description": "Specify the modification localization parameters for DIA-NN.",
+                    "fa_icon": "fas fa-cogs"
+                },
                 "mass_acc_automatic": {
                     "type": "boolean",
                     "default": true,
diff --git a/workflows/dia.nf b/workflows/dia.nf
index c6c7ad6..5b66ee8 100644
--- a/workflows/dia.nf
+++ b/workflows/dia.nf
@@ -62,7 +62,7 @@ workflow DIA {
         def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null
         def parsed_m2 = "0"
         def parsed_m1 = "0"
-        def parsed_w  = "0"        
+        def parsed_w  = "0"
         if (log_file && log_file.exists()) {
             def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/
             if (matcher) {
@@ -70,7 +70,7 @@ workflow DIA {
                 parsed_m1 = matcher[0][2]
                 parsed_w  = matcher[0][3]
             }
-        }        
+        }
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
             .combine(speclib)

From 4e4f0c9491a05b7360251596e5608c626a0e632f Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Tue, 31 Mar 2026 21:07:32 +0800
Subject: [PATCH 09/16] fix

---
 .../diann/assemble_empirical_library/main.nf  |  4 +-
 .../diann/assemble_empirical_library/meta.yml |  8 ++--
 .../local/diann/individual_analysis/main.nf   | 19 ++++-----
 modules/local/parse_empirical_log/main.nf     | 18 ++++++++
 modules/local/parse_empirical_log/meta.yml    | 21 ++++++++++
 modules/local/samplesheet_check/meta.yml      |  3 --
 modules/local/sdrf_parsing/main.nf            |  1 -
 modules/local/sdrf_parsing/meta.yml           |  6 +--
 nextflow.config                               |  4 --
 nextflow_schema.json                          | 20 ---------
 .../local/create_input_channel/main.nf        |  3 +-
 workflows/dia.nf                              | 41 +++++++++++--------
 12 files changed, 79 insertions(+), 69 deletions(-)
 create mode 100644 modules/local/parse_empirical_log/main.nf
 create mode 100644 modules/local/parse_empirical_log/meta.yml

diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
index 2bb3e37..809c46f 100644
--- a/modules/local/diann/assemble_empirical_library/main.nf
+++ b/modules/local/diann/assemble_empirical_library/main.nf
@@ -19,7 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
     path "empirical_library.*", emit: empirical_library
     path "assemble_empirical_library.log", emit: log
     path "versions.yml", emit: versions
-    path "diann_calibrated_params.csv", emit: calibrated_params
+    env CALIBRATED_PARAMS_VAL, emit: calibrated_params_val
 
     when:
     task.ext.when == null || task.ext.when
@@ -90,7 +90,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
     if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi
     if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi
     if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi
-    echo "\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" > diann_calibrated_params.csv
+    export CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}"
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml
index c6ad7ab..f4a22bc 100644
--- a/modules/local/diann/assemble_empirical_library/meta.yml
+++ b/modules/local/diann/assemble_empirical_library/meta.yml
@@ -35,9 +35,9 @@ output:
       type: file
       description: File containing software version
       pattern: "versions.yml"
-  - calibrated_params:
-      type: file
-      description: A file containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
-      pattern: "diann_calibrated_params.csv"
+  - calibrated_params_val:
+      type: string
+      description: A comma-separated string containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
+      pattern: "*,*,*"
 authors:
   - "@daichengxin"
diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf
index 36502b5..28cb3b5 100644
--- a/modules/local/diann/individual_analysis/main.nf
+++ b/modules/local/diann/individual_analysis/main.nf
@@ -50,7 +50,7 @@ process INDIVIDUAL_ANALYSIS {
             mass_acc_ms1 = meta.mass_acc_ms1
             scan_window  = meta.scan_window
         }
-        else if (meta['fragmentmasstolerance']) {
+        else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
             mass_acc_ms2 = meta['fragmentmasstolerance']
             mass_acc_ms1 = meta['precursormasstolerance']
             scan_window  = params.scan_window
@@ -60,19 +60,18 @@ process INDIVIDUAL_ANALYSIS {
             mass_acc_ms1 = params.mass_acc_ms1
             scan_window  = params.scan_window
         }
-    } else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
-        mass_acc_ms1 = meta["precursormasstolerance"]
-        mass_acc_ms2 = meta["fragmentmasstolerance"]
     } else {
-        if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
+        if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
+            mass_acc_ms1 = meta["precursormasstolerance"]
+            mass_acc_ms2 = meta["fragmentmasstolerance"]
+            scan_window  = params.scan_window
+        }
+        else if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
             mass_acc_ms2 = meta.mass_acc_ms2
             mass_acc_ms1 = meta.mass_acc_ms1
             scan_window  = meta.scan_window
-        } else if (meta['fragmentmasstolerance']) {
-            mass_acc_ms2 = meta['fragmentmasstolerance']
-            mass_acc_ms1 = meta['precursormasstolerance']
-            scan_window  = params.scan_window
-        } else {
+        }
+        else {
             mass_acc_ms2 = params.mass_acc_ms2
             mass_acc_ms1 = params.mass_acc_ms1
             scan_window  = params.scan_window
diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf
new file mode 100644
index 0000000..4ff2e78
--- /dev/null
+++ b/modules/local/parse_empirical_log/main.nf
@@ -0,0 +1,18 @@
+process PARSE_EMPIRICAL_LOG {
+    label 'process_single'
+
+    input:
+    path log_file
+
+    output:
+    env PARSED_VALS, emit: parsed_vals
+
+    script:
+    """
+    parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file})
+    if [ -z "\$parsed" ]; then
+        parsed="0,0,0"
+    fi
+    export PARSED_VALS="\$parsed"
+    """
+}
\ No newline at end of file
diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml
new file mode 100644
index 0000000..faf17fe
--- /dev/null
+++ b/modules/local/parse_empirical_log/meta.yml
@@ -0,0 +1,21 @@
+name: "parse_empirical_log"
+description: Parses the empirical assembly log file (e.g., from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis.
+keywords:
+  - quantmsdiann
+tools:
+  - "perl":
+      description: "Larry Wall's Practical Extraction and Report Language, used here for regex parsing."
+      homepage: "https://www.perl.org/"
+      documentation: "https://perldoc.perl.org/"
+input:
+  - log_file:
+      type: file
+      description: The log file generated by the empirical library assembly step (e.g., DIA-NN stdout/stderr log) containing the Averaged recommended settings.
+      pattern: "*.log"
+output:
+  - parsed_vals:
+      type: string
+      description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found.
+      pattern: "*,*,*"
+authors:
+  - "@Qi-Xuan Yue"
\ No newline at end of file
diff --git a/modules/local/samplesheet_check/meta.yml b/modules/local/samplesheet_check/meta.yml
index 28ed5e4..be51717 100644
--- a/modules/local/samplesheet_check/meta.yml
+++ b/modules/local/samplesheet_check/meta.yml
@@ -12,9 +12,6 @@ input:
     type: file
     description: Input samplesheet or experimental design file
     pattern: "*.{tsv,csv,sdrf}"
-  - meta: validate_ontologies
-    type: boolean
-    description: Whether to validate ontologies
 output:
   - meta: log
     type: file
diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf
index ef9367b..a89bc61 100644
--- a/modules/local/sdrf_parsing/main.nf
+++ b/modules/local/sdrf_parsing/main.nf
@@ -10,7 +10,6 @@ process SDRF_PARSING {
     path sdrf
 
     output:
-    path "diann_design.tsv"  , emit: ch_sdrf_config_file
     path "diann_design.tsv"  , emit: ch_expdesign
     path "diann_config.cfg"  , emit: ch_diann_cfg
     path "*.log"             , emit: log
diff --git a/modules/local/sdrf_parsing/meta.yml b/modules/local/sdrf_parsing/meta.yml
index 860f3f1..7c311f4 100644
--- a/modules/local/sdrf_parsing/meta.yml
+++ b/modules/local/sdrf_parsing/meta.yml
@@ -19,11 +19,7 @@ output:
   - ch_expdesign:
       type: file
       description: experimental design file in OpenMS format
-      pattern: "*openms_design.tsv"
-  - ch_sdrf_config_file:
-      type: file
-      description: config file with search engine parameters in OpenMS nomenclature
-      pattern: "*_config.tsv"
+      pattern: "*_design.tsv"
   - mqpar:
       type: file
       description: maxquant configuration file
diff --git a/nextflow.config b/nextflow.config
index 636562b..855456b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -16,10 +16,6 @@ params {
 
     // Input options and validation of sdrf files
     input                      = null
-    validate_ontologies        = true // Enable to validate ontology terms in the SDRF
-    skip_ms_validation         = false // Skip the validation of the MS metadata in the SDRF
-    skip_factor_validation     = true // Skip factor values validation, factor values are important for downstream analysis
-    skip_experimental_design_validation = false // Skip the validation of the experimental design in the SDRF (replicates, etc)
     use_ols_cache_only         = true // Use only the OLS cache for ontology validation (no network requests)
 
     // Tools flags
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 383909c..68f262b 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -62,26 +62,6 @@
             "description": "Settings for validating the input SDRF file.",
             "default": "",
             "properties": {
-                "validate_ontologies": {
-                    "type": "boolean",
-                    "description": "Check that ontology terms in an input SDRF file exist.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "If false, only a basic readability check is performed on an input SDRF file. This option is useful when ontology providers are inaccessible.",
-                    "default": true
-                },
-                "skip_ms_validation": {
-                    "type": "boolean",
-                    "description": "Skip validation of mass spectrometry files.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Skip validation of mass spectrometry metadata, including PTMs, tolerances or enzymes. Only useful if your metadata is correct but the terms are not in ontologies."
-                },
-                "skip_factor_validation": {
-                    "type": "boolean",
-                    "description": "Skip validation of factor columns.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Skip validation of factor columns in the SDRF. Only useful if your factor values are correct but the sdrf-validation library does not recognize them.",
-                    "default": true
-                },
                 "skip_experimental_design_validation": {
                     "type": "boolean",
                     "description": "Skip validation of experimental design.",
diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf
index d44e51c..5465021 100644
--- a/subworkflows/local/create_input_channel/main.nf
+++ b/subworkflows/local/create_input_channel/main.nf
@@ -15,7 +15,6 @@ workflow CREATE_INPUT_CHANNEL {
     // Always parse as SDRF using DIA-NN converter
     SDRF_PARSING(ch_sdrf)
     ch_versions = ch_versions.mix(SDRF_PARSING.out.versions)
-    ch_config = SDRF_PARSING.out.ch_sdrf_config_file
     ch_expdesign = SDRF_PARSING.out.ch_expdesign
     ch_diann_cfg = SDRF_PARSING.out.ch_diann_cfg
 
@@ -27,7 +26,7 @@ workflow CREATE_INPUT_CHANNEL {
         experiment_id: file(ch_sdrf.toString()).baseName,
     ]
 
-    ch_config
+    ch_expdesign
         .splitCsv(header: true, sep: '\t')
         .map { row -> create_meta_channel(row, enzymes, files, wrapper) }
         .set { ch_meta_config_dia }
diff --git a/workflows/dia.nf b/workflows/dia.nf
index 5b66ee8..0b75b9a 100644
--- a/workflows/dia.nf
+++ b/workflows/dia.nf
@@ -7,8 +7,9 @@
 //
 // MODULES: Local to the pipeline
 //
-include { DIANN_MSSTATS              } from '../modules/local/diann/diann_msstats/main'
+include { DIANN_MSSTATS               } from '../modules/local/diann/diann_msstats/main'
 include { PRELIMINARY_ANALYSIS        } from '../modules/local/diann/preliminary_analysis/main'
+include { PARSE_EMPIRICAL_LOG         } from '../modules/local/parse_empirical_log/main'
 include { ASSEMBLE_EMPIRICAL_LIBRARY  } from '../modules/local/diann/assemble_empirical_library/main'
 include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main'
 include { INDIVIDUAL_ANALYSIS         } from '../modules/local/diann/individual_analysis/main'
@@ -59,26 +60,30 @@ workflow DIA {
     }
 
     if (params.skip_preliminary_analysis) {
-        def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null
-        def parsed_m2 = "0"
-        def parsed_m1 = "0"
-        def parsed_w  = "0"
-        if (log_file && log_file.exists()) {
-            def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/
-            if (matcher) {
-                parsed_m2 = matcher[0][1]
-                parsed_m1 = matcher[0][2]
-                parsed_w  = matcher[0][3]
+        if (params.empirical_assembly_log) {
+            ch_log_file = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true)
+            PARSE_EMPIRICAL_LOG(ch_log_file)
+            ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals.map { parsed_str ->
+                def clean_str = parsed_str.trim()
+                if (clean_str == "0,0,0") {
+                    return "${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}"
+                } else {
+                    return clean_str
+                }
             }
+        } else {
+            ch_parsed_vals = Channel.value("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}")
         }
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
             .combine(speclib)
-            .map { meta_map, ms_file, fasta, library ->
+            .combine(ch_parsed_vals)
+            .map { meta_map, ms_file, fasta, library, param_string ->
+                def values = param_string.split(',')
                 def new_meta = meta_map + [
-                    mass_acc_ms2 : parsed_m2,
-                    mass_acc_ms1 : parsed_m1,
-                    scan_window  : parsed_w
+                    mass_acc_ms2 : values[0],
+                    mass_acc_ms1 : values[1],
+                    scan_window  : values[2]
                 ]
                 return [ new_meta, ms_file, fasta, library ]
             }
@@ -121,9 +126,9 @@ workflow DIA {
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
             .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library)
-            .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params)
-            .map { meta_map, ms_file, fasta, library, param_file ->
-                def values = param_file.text.trim().split(',')
+            .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params_val)
+            .map { meta_map, ms_file, fasta, library, param_string ->
+                def values = param_string.trim().split(',')
                 def new_meta = meta_map + [
                     mass_acc_ms2 : values[0],
                     mass_acc_ms1 : values[1],

From 79ce89fa86040fb75971c815e6e1bfb76eb5e050 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Tue, 31 Mar 2026 21:11:58 +0800
Subject: [PATCH 10/16] update

---
 modules/local/parse_empirical_log/main.nf  | 2 +-
 modules/local/parse_empirical_log/meta.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf
index 4ff2e78..88680ab 100644
--- a/modules/local/parse_empirical_log/main.nf
+++ b/modules/local/parse_empirical_log/main.nf
@@ -15,4 +15,4 @@ process PARSE_EMPIRICAL_LOG {
     fi
     export PARSED_VALS="\$parsed"
     """
-}
\ No newline at end of file
+}
diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml
index faf17fe..4388aa0 100644
--- a/modules/local/parse_empirical_log/meta.yml
+++ b/modules/local/parse_empirical_log/meta.yml
@@ -18,4 +18,4 @@ output:
       description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found.
       pattern: "*,*,*"
 authors:
-  - "@Qi-Xuan Yue"
\ No newline at end of file
+  - "@Qi-Xuan Yue"

From c7cf820bef7a1486f32bda35a5a5782e06e553b1 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Tue, 31 Mar 2026 21:15:59 +0800
Subject: [PATCH 11/16] update

---
 nextflow_schema.json | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 68f262b..c507318 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -62,12 +62,6 @@
             "description": "Settings for validating the input SDRF file.",
             "default": "",
             "properties": {
-                "skip_experimental_design_validation": {
-                    "type": "boolean",
-                    "description": "Skip validation of experimental design.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Skip validation of experimental design in the SDRF. Only useful if your experimental design is correct but the sdrf-validation library does not recognize it."
-                },
                 "use_ols_cache_only": {
                     "type": "boolean",
                     "description": "Use cached version of the Ontology Lookup Service (OLS).",

From be2d488c2b23cdb1de19b9fabe957e7045fd713b Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Tue, 31 Mar 2026 21:23:25 +0800
Subject: [PATCH 12/16] update

---
 modules/local/diann/assemble_empirical_library/main.nf | 1 +
 modules/local/parse_empirical_log/main.nf              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
index 809c46f..9a6622f 100644
--- a/modules/local/diann/assemble_empirical_library/main.nf
+++ b/modules/local/diann/assemble_empirical_library/main.nf
@@ -84,6 +84,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
 
     cp report.log.txt assemble_empirical_library.log
 
+    CALIBRATED_PARAMS_VAL="0,0,0"
     val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]")
     val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]")
     val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]")
diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf
index 88680ab..7927d46 100644
--- a/modules/local/parse_empirical_log/main.nf
+++ b/modules/local/parse_empirical_log/main.nf
@@ -9,6 +9,7 @@ process PARSE_EMPIRICAL_LOG {
 
     script:
     """
+    PARSED_VALS="0,0,0"
     parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file})
     if [ -z "\$parsed" ]; then
         parsed="0,0,0"

From fb9eb34104982437bd25fd1194f4a31c130ca973 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Tue, 31 Mar 2026 21:48:40 +0800
Subject: [PATCH 13/16] fix

---
 modules/local/diann/assemble_empirical_library/main.nf | 2 +-
 modules/local/parse_empirical_log/main.nf              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
index 9a6622f..0dfad72 100644
--- a/modules/local/diann/assemble_empirical_library/main.nf
+++ b/modules/local/diann/assemble_empirical_library/main.nf
@@ -19,7 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
     path "empirical_library.*", emit: empirical_library
     path "assemble_empirical_library.log", emit: log
     path "versions.yml", emit: versions
-    env CALIBRATED_PARAMS_VAL, emit: calibrated_params_val
+    env "CALIBRATED_PARAMS_VAL", emit: calibrated_params_val
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf
index 7927d46..0951ae0 100644
--- a/modules/local/parse_empirical_log/main.nf
+++ b/modules/local/parse_empirical_log/main.nf
@@ -5,7 +5,7 @@ process PARSE_EMPIRICAL_LOG {
     path log_file
 
     output:
-    env PARSED_VALS, emit: parsed_vals
+    env "PARSED_VALS", emit: parsed_vals
 
     script:
     """

From 0110a3de5c60d8e3af35fa697cee6850c50bd6a5 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Wed, 1 Apr 2026 10:05:43 +0800
Subject: [PATCH 14/16] update

---
 .../diann/assemble_empirical_library/main.nf  | 10 --------
 .../diann/assemble_empirical_library/meta.yml |  4 ---
 modules/local/parse_empirical_log/main.nf     | 19 --------------
 modules/local/parse_empirical_log/meta.yml    | 21 ----------------
 .../local/parse_empirical_log_task/main.nf    | 24 ++++++++++++++++++
 .../local/parse_empirical_log_task/meta.yml   | 25 +++++++++++++++++++
 .../local/parse_empirical_log/main.nf         | 16 ++++++++++++
 .../local/parse_empirical_log/meta.yml        | 25 +++++++++++++++++++
 workflows/dia.nf                              | 21 ++++++----------
 9 files changed, 98 insertions(+), 67 deletions(-)
 delete mode 100644 modules/local/parse_empirical_log/main.nf
 delete mode 100644 modules/local/parse_empirical_log/meta.yml
 create mode 100644 modules/local/parse_empirical_log_task/main.nf
 create mode 100644 modules/local/parse_empirical_log_task/meta.yml
 create mode 100644 subworkflows/local/parse_empirical_log/main.nf
 create mode 100644 subworkflows/local/parse_empirical_log/meta.yml

diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
index 0dfad72..034b95e 100644
--- a/modules/local/diann/assemble_empirical_library/main.nf
+++ b/modules/local/diann/assemble_empirical_library/main.nf
@@ -19,7 +19,6 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
     path "empirical_library.*", emit: empirical_library
     path "assemble_empirical_library.log", emit: log
     path "versions.yml", emit: versions
-    env "CALIBRATED_PARAMS_VAL", emit: calibrated_params_val
 
     when:
     task.ext.when == null || task.ext.when
@@ -84,15 +83,6 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
 
     cp report.log.txt assemble_empirical_library.log
 
-    CALIBRATED_PARAMS_VAL="0,0,0"
-    val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]")
-    val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]")
-    val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]")
-    if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi
-    if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi
-    if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi
-    export CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}"
-
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?")
diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml
index f4a22bc..0d1f5b7 100644
--- a/modules/local/diann/assemble_empirical_library/meta.yml
+++ b/modules/local/diann/assemble_empirical_library/meta.yml
@@ -35,9 +35,5 @@ output:
       type: file
       description: File containing software version
       pattern: "versions.yml"
-  - calibrated_params_val:
-      type: string
-      description: A comma-separated string containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
-      pattern: "*,*,*"
 authors:
   - "@daichengxin"
diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf
deleted file mode 100644
index 0951ae0..0000000
--- a/modules/local/parse_empirical_log/main.nf
+++ /dev/null
@@ -1,19 +0,0 @@
-process PARSE_EMPIRICAL_LOG {
-    label 'process_single'
-
-    input:
-    path log_file
-
-    output:
-    env "PARSED_VALS", emit: parsed_vals
-
-    script:
-    """
-    PARSED_VALS="0,0,0"
-    parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file})
-    if [ -z "\$parsed" ]; then
-        parsed="0,0,0"
-    fi
-    export PARSED_VALS="\$parsed"
-    """
-}
diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml
deleted file mode 100644
index 4388aa0..0000000
--- a/modules/local/parse_empirical_log/meta.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: "parse_empirical_log"
-description: Parses the empirical assembly log file (e.g., from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis.
-keywords:
-  - quantmsdiann
-tools:
-  - "perl":
-      description: "Larry Wall's Practical Extraction and Report Language, used here for regex parsing."
-      homepage: "https://www.perl.org/"
-      documentation: "https://perldoc.perl.org/"
-input:
-  - log_file:
-      type: file
-      description: The log file generated by the empirical library assembly step (e.g., DIA-NN stdout/stderr log) containing the Averaged recommended settings.
-      pattern: "*.log"
-output:
-  - parsed_vals:
-      type: string
-      description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found.
-      pattern: "*,*,*"
-authors:
-  - "@Qi-Xuan Yue"
diff --git a/modules/local/parse_empirical_log_task/main.nf b/modules/local/parse_empirical_log_task/main.nf
new file mode 100644
index 0000000..8a698de
--- /dev/null
+++ b/modules/local/parse_empirical_log_task/main.nf
@@ -0,0 +1,24 @@
+process PARSE_EMPIRICAL_LOG_TASK {
+    label 'process_single'
+
+    input:
+    path log_file
+
+    output:
+    stdout emit: parsed_vals
+
+    script:
+    """
+    val_mass_acc_ms2=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 11 | tr -cd "[0-9.]")
+    val_mass_acc_ms1=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 15 | tr -cd "[0-9.]")
+    val_scan_window=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 19 | tr -cd "[0-9.]")
+    
+    if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2=${params.mass_acc_ms2}; fi
+    if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1=${params.mass_acc_ms1}; fi
+    if [ -z "\$val_scan_window" ]; then val_scan_window=${params.scan_window}; fi
+    
+    CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}"
+
+    echo -n "\$CALIBRATED_PARAMS_VAL"
+    """
+}
diff --git a/modules/local/parse_empirical_log_task/meta.yml b/modules/local/parse_empirical_log_task/meta.yml
new file mode 100644
index 0000000..8bdc6b7
--- /dev/null
+++ b/modules/local/parse_empirical_log_task/meta.yml
@@ -0,0 +1,25 @@
+name: "parse_empirical_log_task"
+description: "Parses the empirical assembly log file (from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis."
+keywords:
+  - quantmsdiann
+  - diann
+  - log
+  - parse
+  - proteomics
+  - mass_accuracy
+tools:
+  - "coreutils":
+      description: "Standard GNU core utilities (grep, cut, tr, echo) used for text processing and log parsing."
+      homepage: "https://www.gnu.org/software/coreutils/"
+      documentation: "https://www.gnu.org/software/coreutils/manual/"
+input:
+  - log_file:
+      type: file
+      description: "The log file generated by the empirical library assembly step (DIA-NN stdout/stderr log) containing the Averaged recommended settings."
+      pattern: "*.log"
+output:
+  - parsed_vals:
+      type: string
+      description: "A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., '15,20,8').
+authors:
+  - "@bigbio"
diff --git a/subworkflows/local/parse_empirical_log/main.nf b/subworkflows/local/parse_empirical_log/main.nf
new file mode 100644
index 0000000..be86eef
--- /dev/null
+++ b/subworkflows/local/parse_empirical_log/main.nf
@@ -0,0 +1,16 @@
+
+include { PARSE_EMPIRICAL_LOG_TASK } from '../../../modules/local/parse_empirical_log_task'
+
+workflow PARSE_EMPIRICAL_LOG {
+    take:
+    ch_log_file
+
+    main:
+    PARSE_EMPIRICAL_LOG_TASK(ch_log_file)
+
+    ch_parsed_vals = PARSE_EMPIRICAL_LOG_TASK.out.parsed_vals
+        .ifEmpty("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}")
+
+    emit:
+    parsed_vals = ch_parsed_vals
+}
\ No newline at end of file
diff --git a/subworkflows/local/parse_empirical_log/meta.yml b/subworkflows/local/parse_empirical_log/meta.yml
new file mode 100644
index 0000000..f6baabb
--- /dev/null
+++ b/subworkflows/local/parse_empirical_log/meta.yml
@@ -0,0 +1,25 @@
+name: "parse_empirical_log"
+description: "Subworkflow for parsing the empirical assembly log file (from DIA-NN) to extract calibrated parameters."
+keywords:
+  - parse
+  - log
+  - diann
+  - proteomics
+  - parameters
+  - mass_accuracy
+components:
+  - parse_empirical_log_task
+input:
+  - ch_log_file:
+      type: file
+      description: |
+        The log file generated by the empirical library assembly step. Can be an empty channel if the user did not provide a log file.
+output:
+  - parsed_vals:
+      type: string
+      description: |
+        A value channel containing a comma-separated string of the extracted parameters (mass_acc_ms2, mass_acc_ms1, scan_window). Falls back to default pipeline parameters if the log is empty or invalid.
+authors:
+  - "@bigbio"
+maintainers:
+  - "@bigbio"
diff --git a/workflows/dia.nf b/workflows/dia.nf
index 0b75b9a..67e6e1e 100644
--- a/workflows/dia.nf
+++ b/workflows/dia.nf
@@ -9,7 +9,7 @@
 //
 include { DIANN_MSSTATS               } from '../modules/local/diann/diann_msstats/main'
 include { PRELIMINARY_ANALYSIS        } from '../modules/local/diann/preliminary_analysis/main'
-include { PARSE_EMPIRICAL_LOG         } from '../modules/local/parse_empirical_log/main'
+include { PARSE_EMPIRICAL_LOG         } from '../subworkflows/local/parse_empirical_log/main'
 include { ASSEMBLE_EMPIRICAL_LIBRARY  } from '../modules/local/diann/assemble_empirical_library/main'
 include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main'
 include { INDIVIDUAL_ANALYSIS         } from '../modules/local/diann/individual_analysis/main'
@@ -61,19 +61,12 @@ workflow DIA {
 
     if (params.skip_preliminary_analysis) {
         if (params.empirical_assembly_log) {
-            ch_log_file = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true)
-            PARSE_EMPIRICAL_LOG(ch_log_file)
-            ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals.map { parsed_str ->
-                def clean_str = parsed_str.trim()
-                if (clean_str == "0,0,0") {
-                    return "${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}"
-                } else {
-                    return clean_str
-                }
-            }
+            ch_empirical_log = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true)
         } else {
-            ch_parsed_vals = Channel.value("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}")
+            ch_empirical_log = Channel.empty()
         }
+        PARSE_EMPIRICAL_LOG(ch_empirical_log)
+        ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
             .combine(speclib)
@@ -123,10 +116,12 @@ workflow DIA {
         )
         ch_software_versions = ch_software_versions
             .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.versions)
+        PARSE_EMPIRICAL_LOG(ASSEMBLE_EMPIRICAL_LIBRARY.out.log)
+        ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
             .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library)
-            .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params_val)
+            .combine(ch_parsed_vals)
             .map { meta_map, ms_file, fasta, library, param_string ->
                 def values = param_string.trim().split(',')
                 def new_meta = meta_map + [

From a7db13bc206108abac3a1b0cacfdd3b77185de5e Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Wed, 1 Apr 2026 10:07:24 +0800
Subject: [PATCH 15/16] update

---
 modules/local/parse_empirical_log_task/main.nf | 4 ++--
 subworkflows/local/parse_empirical_log/main.nf | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/parse_empirical_log_task/main.nf b/modules/local/parse_empirical_log_task/main.nf
index 8a698de..dcee2c8 100644
--- a/modules/local/parse_empirical_log_task/main.nf
+++ b/modules/local/parse_empirical_log_task/main.nf
@@ -12,11 +12,11 @@ process PARSE_EMPIRICAL_LOG_TASK {
     val_mass_acc_ms2=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 11 | tr -cd "[0-9.]")
     val_mass_acc_ms1=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 15 | tr -cd "[0-9.]")
     val_scan_window=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 19 | tr -cd "[0-9.]")
-    
+
     if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2=${params.mass_acc_ms2}; fi
     if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1=${params.mass_acc_ms1}; fi
     if [ -z "\$val_scan_window" ]; then val_scan_window=${params.scan_window}; fi
-    
+
     CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}"
 
     echo -n "\$CALIBRATED_PARAMS_VAL"
diff --git a/subworkflows/local/parse_empirical_log/main.nf b/subworkflows/local/parse_empirical_log/main.nf
index be86eef..0d1322d 100644
--- a/subworkflows/local/parse_empirical_log/main.nf
+++ b/subworkflows/local/parse_empirical_log/main.nf
@@ -13,4 +13,4 @@ workflow PARSE_EMPIRICAL_LOG {
 
     emit:
     parsed_vals = ch_parsed_vals
-}
\ No newline at end of file
+}

From bfde7a99359ae5123730432a57ab9999dc47dd84 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Wed, 1 Apr 2026 10:10:47 +0800
Subject: [PATCH 16/16] fix

---
 modules/local/parse_empirical_log_task/meta.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/local/parse_empirical_log_task/meta.yml b/modules/local/parse_empirical_log_task/meta.yml
index 8bdc6b7..df72533 100644
--- a/modules/local/parse_empirical_log_task/meta.yml
+++ b/modules/local/parse_empirical_log_task/meta.yml
@@ -20,6 +20,7 @@ input:
 output:
   - parsed_vals:
       type: string
-      description: "A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., '15,20,8').
+      description: "A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., '15,20,8')."
+      pattern: "*,*,*"
 authors:
   - "@bigbio"