bigbio · ypriverol · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf
@@ -19,7 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
     path "empirical_library.*", emit: empirical_library
     path "assemble_empirical_library.log", emit: log
     path "versions.yml", emit: versions
-    path "diann_calibrated_params.csv", emit: calibrated_params
+    env CALIBRATED_PARAMS_VAL, emit: calibrated_params_val
 
     when:
     task.ext.when == null || task.ext.when
@@ -84,13 +84,14 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
 
     cp report.log.txt assemble_empirical_library.log
 
+    CALIBRATED_PARAMS_VAL="0,0,0"
     val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]")
     val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]")
     val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]")
     if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi
     if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi
     if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi
-    echo "\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" > diann_calibrated_params.csv
+    export CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}"
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml
@@ -35,9 +35,9 @@ output:
       type: file
       description: File containing software version
       pattern: "versions.yml"
-  - calibrated_params:
-      type: file
-      description: A file containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
-      pattern: "diann_calibrated_params.csv"
+  - calibrated_params_val:
+      type: string
+      description: A comma-separated string containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
+      pattern: "*,*,*"
 authors:
   - "@daichengxin"
diff --git a/modules/local/diann/individual_analysis/main.nf b/modules/local/diann/individual_analysis/main.nf
@@ -50,7 +50,7 @@ process INDIVIDUAL_ANALYSIS {
             mass_acc_ms1 = meta.mass_acc_ms1
             scan_window  = meta.scan_window
         }
-        else if (meta['fragmentmasstolerance']) {
+        else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
             mass_acc_ms2 = meta['fragmentmasstolerance']
             mass_acc_ms1 = meta['precursormasstolerance']
             scan_window  = params.scan_window
@@ -60,19 +60,18 @@ process INDIVIDUAL_ANALYSIS {
             mass_acc_ms1 = params.mass_acc_ms1
             scan_window  = params.scan_window
         }
-    } else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
-        mass_acc_ms1 = meta["precursormasstolerance"]
-        mass_acc_ms2 = meta["fragmentmasstolerance"]
     } else {
-        if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
+        if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
+            mass_acc_ms1 = meta["precursormasstolerance"]
+            mass_acc_ms2 = meta["fragmentmasstolerance"]
+            scan_window  = params.scan_window
+        }
+        else if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
             mass_acc_ms2 = meta.mass_acc_ms2
             mass_acc_ms1 = meta.mass_acc_ms1
             scan_window  = meta.scan_window
-        } else if (meta['fragmentmasstolerance']) {
-            mass_acc_ms2 = meta['fragmentmasstolerance']
-            mass_acc_ms1 = meta['precursormasstolerance']
-            scan_window  = params.scan_window
-        } else {
+        }
+        else {
             mass_acc_ms2 = params.mass_acc_ms2
             mass_acc_ms1 = params.mass_acc_ms1
             scan_window  = params.scan_window

diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf
@@ -0,0 +1,19 @@
+process PARSE_EMPIRICAL_LOG {
+    label 'process_single'
+
+    input:
+    path log_file
+
+    output:
+    env PARSED_VALS, emit: parsed_vals
+
+    script:
+    """
+    PARSED_VALS="0,0,0"
+    parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file})
+    if [ -z "\$parsed" ]; then
+        parsed="0,0,0"
+    fi
+    export PARSED_VALS="\$parsed"
+    """
+}
diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml
@@ -0,0 +1,21 @@
+name: "parse_empirical_log"
+description: Parses the empirical assembly log file (e.g., from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis.
+keywords:
+  - quantmsdiann
+tools:
+  - "perl":
+      description: "Larry Wall's Practical Extraction and Report Language, used here for regex parsing."
+      homepage: "https://www.perl.org/"
+      documentation: "https://perldoc.perl.org/"
+input:
+  - log_file:
+      type: file
+      description: The log file generated by the empirical library assembly step (e.g., DIA-NN stdout/stderr log) containing the Averaged recommended settings.
+      pattern: "*.log"
+output:
+  - parsed_vals:
+      type: string
+      description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found.
+      pattern: "*,*,*"
+authors:
+  - "@Qi-Xuan Yue"
diff --git a/modules/local/samplesheet_check/meta.yml b/modules/local/samplesheet_check/meta.yml
@@ -12,9 +12,6 @@ input:
     type: file
     description: Input samplesheet or experimental design file
     pattern: "*.{tsv,csv,sdrf}"
-  - meta: validate_ontologies
-    type: boolean
-    description: Whether to validate ontologies
 output:
   - meta: log
     type: file

diff --git a/modules/local/sdrf_parsing/main.nf b/modules/local/sdrf_parsing/main.nf
@@ -10,7 +10,6 @@ process SDRF_PARSING {
     path sdrf
 
     output:
-    path "diann_design.tsv"  , emit: ch_sdrf_config_file
     path "diann_design.tsv"  , emit: ch_expdesign
     path "diann_config.cfg"  , emit: ch_diann_cfg
     path "*.log"             , emit: log

diff --git a/modules/local/sdrf_parsing/meta.yml b/modules/local/sdrf_parsing/meta.yml
@@ -19,11 +19,7 @@ output:
   - ch_expdesign:
       type: file
       description: experimental design file in OpenMS format
-      pattern: "*openms_design.tsv"
-  - ch_sdrf_config_file:
-      type: file
-      description: config file with search engine parameters in OpenMS nomenclature
-      pattern: "*_config.tsv"
+      pattern: "*_design.tsv"
   - mqpar:
       type: file
       description: maxquant configuration file

diff --git a/nextflow.config b/nextflow.config
@@ -16,10 +16,6 @@ params {
 
     // Input options and validation of sdrf files
     input                      = null
-    validate_ontologies        = true // Enable to validate ontology terms in the SDRF
-    skip_ms_validation         = false // Skip the validation of the MS metadata in the SDRF
-    skip_factor_validation     = true // Skip factor values validation, factor values are important for downstream analysis
-    skip_experimental_design_validation = false // Skip the validation of the experimental design in the SDRF (replicates, etc)
     use_ols_cache_only         = true // Use only the OLS cache for ontology validation (no network requests)
 
     // Tools flags

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -62,32 +62,6 @@
             "description": "Settings for validating the input SDRF file.",
             "default": "",
             "properties": {
-                "validate_ontologies": {
-                    "type": "boolean",
-                    "description": "Check that ontology terms in an input SDRF file exist.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "If false, only a basic readability check is performed on an input SDRF file. This option is useful when ontology providers are inaccessible.",
-                    "default": true
-                },
-                "skip_ms_validation": {
-                    "type": "boolean",
-                    "description": "Skip validation of mass spectrometry files.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Skip validation of mass spectrometry metadata, including PTMs, tolerances or enzymes. Only useful if your metadata is correct but the terms are not in ontologies."
-                },
-                "skip_factor_validation": {
-                    "type": "boolean",
-                    "description": "Skip validation of factor columns.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Skip validation of factor columns in the SDRF. Only useful if your factor values are correct but the sdrf-validation library does not recognize them.",
-                    "default": true
-                },
-                "skip_experimental_design_validation": {
-                    "type": "boolean",
-                    "description": "Skip validation of experimental design.",
-                    "fa_icon": "far fa-check-square",
-                    "help_text": "Skip validation of experimental design in the SDRF. Only useful if your experimental design is correct but the sdrf-validation library does not recognize it."
-                },
                 "use_ols_cache_only": {
                     "type": "boolean",
                     "description": "Use cached version of the Ontology Lookup Service (OLS).",

diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf
@@ -15,7 +15,6 @@ workflow CREATE_INPUT_CHANNEL {
     // Always parse as SDRF using DIA-NN converter
     SDRF_PARSING(ch_sdrf)
     ch_versions = ch_versions.mix(SDRF_PARSING.out.versions)
-    ch_config = SDRF_PARSING.out.ch_sdrf_config_file
     ch_expdesign = SDRF_PARSING.out.ch_expdesign
     ch_diann_cfg = SDRF_PARSING.out.ch_diann_cfg
 
@@ -27,7 +26,7 @@ workflow CREATE_INPUT_CHANNEL {
         experiment_id: file(ch_sdrf.toString()).baseName,
     ]
 
-    ch_config
+    ch_expdesign
         .splitCsv(header: true, sep: '\t')
         .map { row -> create_meta_channel(row, enzymes, files, wrapper) }
         .set { ch_meta_config_dia }

diff --git a/workflows/dia.nf b/workflows/dia.nf
@@ -7,8 +7,9 @@
 //
 // MODULES: Local to the pipeline
 //
-include { DIANN_MSSTATS              } from '../modules/local/diann/diann_msstats/main'
+include { DIANN_MSSTATS               } from '../modules/local/diann/diann_msstats/main'
 include { PRELIMINARY_ANALYSIS        } from '../modules/local/diann/preliminary_analysis/main'
+include { PARSE_EMPIRICAL_LOG         } from '../modules/local/parse_empirical_log/main'
 include { ASSEMBLE_EMPIRICAL_LIBRARY  } from '../modules/local/diann/assemble_empirical_library/main'
 include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main'
 include { INDIVIDUAL_ANALYSIS         } from '../modules/local/diann/individual_analysis/main'
@@ -59,26 +60,30 @@ workflow DIA {
     }
 
     if (params.skip_preliminary_analysis) {
-        def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null
-        def parsed_m2 = "0"
-        def parsed_m1 = "0"
-        def parsed_w  = "0"
-        if (log_file && log_file.exists()) {
-            def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/
-            if (matcher) {
-                parsed_m2 = matcher[0][1]
-                parsed_m1 = matcher[0][2]
-                parsed_w  = matcher[0][3]
+        if (params.empirical_assembly_log) {
+            ch_log_file = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true)
+            PARSE_EMPIRICAL_LOG(ch_log_file)
+            ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals.map { parsed_str ->
+                def clean_str = parsed_str.trim()
+                if (clean_str == "0,0,0") {
+                    return "${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}"
+                } else {
+                    return clean_str
+                }
             }
+        } else {
+            ch_parsed_vals = Channel.value("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}")
         }
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
             .combine(speclib)
-            .map { meta_map, ms_file, fasta, library ->
+            .combine(ch_parsed_vals)
+            .map { meta_map, ms_file, fasta, library, param_string ->
+                def values = param_string.split(',')
                 def new_meta = meta_map + [
-                    mass_acc_ms2 : parsed_m2,
-                    mass_acc_ms1 : parsed_m1,
-                    scan_window  : parsed_w
+                    mass_acc_ms2 : values[0],
+                    mass_acc_ms1 : values[1],
+                    scan_window  : values[2]
                 ]
                 return [ new_meta, ms_file, fasta, library ]
             }
@@ -121,9 +126,9 @@ workflow DIA {
         indiv_fin_analysis_in = ch_file_preparation_results
             .combine(ch_searchdb)
             .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library)
-            .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params)
-            .map { meta_map, ms_file, fasta, library, param_file ->
-                def values = param_file.text.trim().split(',')
+            .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params_val)
+            .map { meta_map, ms_file, fasta, library, param_string ->
+                def values = param_string.trim().split(',')
                 def new_meta = meta_map + [
                     mass_acc_ms2 : values[0],
                     mass_acc_ms1 : values[1],