diff --git a/modules/local/diann/assemble_empirical_library/main.nf b/modules/local/diann/assemble_empirical_library/main.nf index 9a6622f..034b95e 100644 --- a/modules/local/diann/assemble_empirical_library/main.nf +++ b/modules/local/diann/assemble_empirical_library/main.nf @@ -19,7 +19,6 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { path "empirical_library.*", emit: empirical_library path "assemble_empirical_library.log", emit: log path "versions.yml", emit: versions - env CALIBRATED_PARAMS_VAL, emit: calibrated_params_val when: task.ext.when == null || task.ext.when @@ -84,15 +83,6 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { cp report.log.txt assemble_empirical_library.log - CALIBRATED_PARAMS_VAL="0,0,0" - val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]") - val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]") - val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]") - if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi - if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi - if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi - export CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" - cat <<-END_VERSIONS > versions.yml "${task.process}": DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?") diff --git a/modules/local/diann/assemble_empirical_library/meta.yml b/modules/local/diann/assemble_empirical_library/meta.yml index f4a22bc..0d1f5b7 100644 --- a/modules/local/diann/assemble_empirical_library/meta.yml +++ b/modules/local/diann/assemble_empirical_library/meta.yml @@ -35,9 +35,5 @@ output: type: file description: File containing software version pattern: "versions.yml" - - calibrated_params_val: - type: string - description: A comma-separated string containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log. - pattern: "*,*,*" authors: - "@daichengxin" diff --git a/modules/local/parse_empirical_log/main.nf b/modules/local/parse_empirical_log/main.nf deleted file mode 100644 index 7927d46..0000000 --- a/modules/local/parse_empirical_log/main.nf +++ /dev/null @@ -1,19 +0,0 @@ -process PARSE_EMPIRICAL_LOG { - label 'process_single' - - input: - path log_file - - output: - env PARSED_VALS, emit: parsed_vals - - script: - """ - PARSED_VALS="0,0,0" - parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file}) - if [ -z "\$parsed" ]; then - parsed="0,0,0" - fi - export PARSED_VALS="\$parsed" - """ -} diff --git a/modules/local/parse_empirical_log/meta.yml b/modules/local/parse_empirical_log/meta.yml deleted file mode 100644 index 4388aa0..0000000 --- a/modules/local/parse_empirical_log/meta.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: "parse_empirical_log" -description: Parses the empirical assembly log file (e.g., from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis. -keywords: - - quantmsdiann -tools: - - "perl": - description: "Larry Wall's Practical Extraction and Report Language, used here for regex parsing." - homepage: "https://www.perl.org/" - documentation: "https://perldoc.perl.org/" -input: - - log_file: - type: file - description: The log file generated by the empirical library assembly step (e.g., DIA-NN stdout/stderr log) containing the Averaged recommended settings. - pattern: "*.log" -output: - - parsed_vals: - type: string - description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found. - pattern: "*,*,*" -authors: - - "@Qi-Xuan Yue" diff --git a/modules/local/parse_empirical_log_task/main.nf b/modules/local/parse_empirical_log_task/main.nf new file mode 100644 index 0000000..dcee2c8 --- /dev/null +++ b/modules/local/parse_empirical_log_task/main.nf @@ -0,0 +1,24 @@ +process PARSE_EMPIRICAL_LOG_TASK { + label 'process_single' + + input: + path log_file + + output: + stdout emit: parsed_vals + + script: + """ + val_mass_acc_ms2=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 11 | tr -cd "[0-9.]") + val_mass_acc_ms1=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 15 | tr -cd "[0-9.]") + val_scan_window=\$(grep "Averaged recommended settings" ${log_file} | cut -d ' ' -f 19 | tr -cd "[0-9.]") + + if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2=${params.mass_acc_ms2}; fi + if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1=${params.mass_acc_ms1}; fi + if [ -z "\$val_scan_window" ]; then val_scan_window=${params.scan_window}; fi + + CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" + + echo -n "\$CALIBRATED_PARAMS_VAL" + """ +} diff --git a/modules/local/parse_empirical_log_task/meta.yml b/modules/local/parse_empirical_log_task/meta.yml new file mode 100644 index 0000000..df72533 --- /dev/null +++ b/modules/local/parse_empirical_log_task/meta.yml @@ -0,0 +1,26 @@ +name: "parse_empirical_log_task" +description: "Parses the empirical assembly log file (from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis." +keywords: + - quantmsdiann + - diann + - log + - parse + - proteomics + - mass_accuracy +tools: + - "coreutils": + description: "Standard GNU core utilities (grep, cut, tr, echo) used for text processing and log parsing." + homepage: "https://www.gnu.org/software/coreutils/" + documentation: "https://www.gnu.org/software/coreutils/manual/" +input: + - log_file: + type: file + description: "The log file generated by the empirical library assembly step (DIA-NN stdout/stderr log) containing the Averaged recommended settings." + pattern: "*.log" +output: + - parsed_vals: + type: string + description: "A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., '15,20,8')." + pattern: "*,*,*" +authors: + - "@bigbio" diff --git a/subworkflows/local/parse_empirical_log/main.nf b/subworkflows/local/parse_empirical_log/main.nf new file mode 100644 index 0000000..0d1322d --- /dev/null +++ b/subworkflows/local/parse_empirical_log/main.nf @@ -0,0 +1,16 @@ + +include { PARSE_EMPIRICAL_LOG_TASK } from '../../../modules/local/parse_empirical_log_task' + +workflow PARSE_EMPIRICAL_LOG { + take: + ch_log_file + + main: + PARSE_EMPIRICAL_LOG_TASK(ch_log_file) + + ch_parsed_vals = PARSE_EMPIRICAL_LOG_TASK.out.parsed_vals + .ifEmpty("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}") + + emit: + parsed_vals = ch_parsed_vals +} diff --git a/subworkflows/local/parse_empirical_log/meta.yml b/subworkflows/local/parse_empirical_log/meta.yml new file mode 100644 index 0000000..f6baabb --- /dev/null +++ b/subworkflows/local/parse_empirical_log/meta.yml @@ -0,0 +1,25 @@ +name: "parse_empirical_log" +description: "Subworkflow for parsing the empirical assembly log file (from DIA-NN) to extract calibrated parameters." +keywords: + - parse + - log + - diann + - proteomics + - parameters + - mass_accuracy +components: + - parse_empirical_log_task +input: + - ch_log_file: + type: file + description: | + The log file generated by the empirical library assembly step. Can be an empty channel if the user did not provide a log file. +output: + - parsed_vals: + type: string + description: | + A value channel containing a comma-separated string of the extracted parameters (mass_acc_ms2, mass_acc_ms1, scan_window). Falls back to default pipeline parameters if the log is empty or invalid. +authors: + - "@bigbio" +maintainers: + - "@bigbio" diff --git a/workflows/dia.nf b/workflows/dia.nf index 0b75b9a..67e6e1e 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -9,7 +9,7 @@ // include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main' include { PRELIMINARY_ANALYSIS } from '../modules/local/diann/preliminary_analysis/main' -include { PARSE_EMPIRICAL_LOG } from '../modules/local/parse_empirical_log/main' +include { PARSE_EMPIRICAL_LOG } from '../subworkflows/local/parse_empirical_log/main' include { ASSEMBLE_EMPIRICAL_LIBRARY } from '../modules/local/diann/assemble_empirical_library/main' include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main' include { INDIVIDUAL_ANALYSIS } from '../modules/local/diann/individual_analysis/main' @@ -61,19 +61,12 @@ workflow DIA { if (params.skip_preliminary_analysis) { if (params.empirical_assembly_log) { - ch_log_file = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true) - PARSE_EMPIRICAL_LOG(ch_log_file) - ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals.map { parsed_str -> - def clean_str = parsed_str.trim() - if (clean_str == "0,0,0") { - return "${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}" - } else { - return clean_str - } - } + ch_empirical_log = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true) } else { - ch_parsed_vals = Channel.value("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}") + ch_empirical_log = Channel.empty() } + PARSE_EMPIRICAL_LOG(ch_empirical_log) + ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) .combine(speclib) @@ -123,10 +116,12 @@ workflow DIA { ) ch_software_versions = ch_software_versions .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.versions) + PARSE_EMPIRICAL_LOG(ASSEMBLE_EMPIRICAL_LIBRARY.out.log) + ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals indiv_fin_analysis_in = ch_file_preparation_results .combine(ch_searchdb) .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library) - .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params_val) + .combine(ch_parsed_vals) .map { meta_map, ms_file, fasta, library, param_string -> def values = param_string.trim().split(',') def new_meta = meta_map + [