Skip to content
Merged

fix #23

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions modules/local/diann/assemble_empirical_library/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {
path "empirical_library.*", emit: empirical_library
path "assemble_empirical_library.log", emit: log
path "versions.yml", emit: versions
path "diann_calibrated_params.csv", emit: calibrated_params
env CALIBRATED_PARAMS_VAL, emit: calibrated_params_val

when:
task.ext.when == null || task.ext.when
Expand Down Expand Up @@ -84,13 +84,14 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {

cp report.log.txt assemble_empirical_library.log

CALIBRATED_PARAMS_VAL="0,0,0"
val_mass_acc_ms2=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 11 | tr -cd "[0-9.]")
val_mass_acc_ms1=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 15 | tr -cd "[0-9.]")
val_scan_window=\$(grep "Averaged recommended settings" assemble_empirical_library.log | cut -d ' ' -f 19 | tr -cd "[0-9.]")
if [ -z "\$val_mass_acc_ms2" ]; then val_mass_acc_ms2="0"; fi
if [ -z "\$val_mass_acc_ms1" ]; then val_mass_acc_ms1="0"; fi
if [ -z "\$val_scan_window" ]; then val_scan_window="0"; fi
echo "\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}" > diann_calibrated_params.csv
export CALIBRATED_PARAMS_VAL="\${val_mass_acc_ms2},\${val_mass_acc_ms1},\${val_scan_window}"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
8 changes: 4 additions & 4 deletions modules/local/diann/assemble_empirical_library/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ output:
type: file
description: File containing software version
pattern: "versions.yml"
- calibrated_params:
type: file
description: A file containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
pattern: "diann_calibrated_params.csv"
- calibrated_params_val:
type: string
description: A comma-separated string containing mass_acc_ms2, mass_acc_ms1, and scan_window extracted from the DIA-NN log.
pattern: "*,*,*"
authors:
- "@daichengxin"
19 changes: 9 additions & 10 deletions modules/local/diann/individual_analysis/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ process INDIVIDUAL_ANALYSIS {
mass_acc_ms1 = meta.mass_acc_ms1
scan_window = meta.scan_window
}
else if (meta['fragmentmasstolerance']) {
else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
mass_acc_ms2 = meta['fragmentmasstolerance']
mass_acc_ms1 = meta['precursormasstolerance']
scan_window = params.scan_window
Expand All @@ -60,19 +60,18 @@ process INDIVIDUAL_ANALYSIS {
mass_acc_ms1 = params.mass_acc_ms1
scan_window = params.scan_window
}
} else if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
mass_acc_ms1 = meta["precursormasstolerance"]
mass_acc_ms2 = meta["fragmentmasstolerance"]
} else {
if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
if (meta['precursormasstoleranceunit']?.toLowerCase()?.endsWith('ppm') && meta['fragmentmasstoleranceunit']?.toLowerCase()?.endsWith('ppm')) {
mass_acc_ms1 = meta["precursormasstolerance"]
mass_acc_ms2 = meta["fragmentmasstolerance"]
scan_window = params.scan_window
}
else if (meta.mass_acc_ms2 != "0" && meta.mass_acc_ms2 != null) {
mass_acc_ms2 = meta.mass_acc_ms2
mass_acc_ms1 = meta.mass_acc_ms1
scan_window = meta.scan_window
} else if (meta['fragmentmasstolerance']) {
mass_acc_ms2 = meta['fragmentmasstolerance']
mass_acc_ms1 = meta['precursormasstolerance']
scan_window = params.scan_window
} else {
}
else {
mass_acc_ms2 = params.mass_acc_ms2
mass_acc_ms1 = params.mass_acc_ms1
scan_window = params.scan_window
Expand Down
19 changes: 19 additions & 0 deletions modules/local/parse_empirical_log/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
process PARSE_EMPIRICAL_LOG {
label 'process_single'

input:
path log_file

output:
env PARSED_VALS, emit: parsed_vals

script:
"""
PARSED_VALS="0,0,0"
parsed=\$(perl -ne 'if (/Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/) { print "\$1,\$2,\$3"; exit; }' ${log_file})
if [ -z "\$parsed" ]; then
parsed="0,0,0"
fi
export PARSED_VALS="\$parsed"
"""
}
21 changes: 21 additions & 0 deletions modules/local/parse_empirical_log/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: "parse_empirical_log"
description: Parses the empirical assembly log file (e.g., from DIA-NN) to extract calibrated mass accuracies and scan window parameters for downstream analysis.
keywords:
- quantmsdiann
tools:
- "perl":
description: "Larry Wall's Practical Extraction and Report Language, used here for regex parsing."
homepage: "https://www.perl.org/"
documentation: "https://perldoc.perl.org/"
input:
- log_file:
type: file
description: The log file generated by the empirical library assembly step (e.g., DIA-NN stdout/stderr log) containing the Averaged recommended settings.
pattern: "*.log"
output:
- parsed_vals:
type: string
description: A comma-separated string containing extracted mass_acc_ms2, mass_acc_ms1, and scan_window values (e.g., "15.0,20.0,3"). Defaults to "0,0,0" if no match is found.
pattern: "*,*,*"
authors:
- "@Qi-Xuan Yue"
3 changes: 0 additions & 3 deletions modules/local/samplesheet_check/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ input:
type: file
description: Input samplesheet or experimental design file
pattern: "*.{tsv,csv,sdrf}"
- meta: validate_ontologies
type: boolean
description: Whether to validate ontologies
output:
- meta: log
type: file
Expand Down
1 change: 0 additions & 1 deletion modules/local/sdrf_parsing/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ process SDRF_PARSING {
path sdrf

output:
path "diann_design.tsv" , emit: ch_sdrf_config_file
path "diann_design.tsv" , emit: ch_expdesign
path "diann_config.cfg" , emit: ch_diann_cfg
path "*.log" , emit: log
Expand Down
6 changes: 1 addition & 5 deletions modules/local/sdrf_parsing/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,7 @@ output:
- ch_expdesign:
type: file
description: experimental design file in OpenMS format
pattern: "*openms_design.tsv"
- ch_sdrf_config_file:
type: file
description: config file with search engine parameters in OpenMS nomenclature
pattern: "*_config.tsv"
pattern: "*_design.tsv"
- mqpar:
type: file
description: maxquant configuration file
Expand Down
4 changes: 0 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ params {

// Input options and validation of sdrf files
input = null
validate_ontologies = true // Enable to validate ontology terms in the SDRF
skip_ms_validation = false // Skip the validation of the MS metadata in the SDRF
skip_factor_validation = true // Skip factor values validation, factor values are important for downstream analysis
skip_experimental_design_validation = false // Skip the validation of the experimental design in the SDRF (replicates, etc)
use_ols_cache_only = true // Use only the OLS cache for ontology validation (no network requests)

// Tools flags
Expand Down
26 changes: 0 additions & 26 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,32 +62,6 @@
"description": "Settings for validating the input SDRF file.",
"default": "",
"properties": {
"validate_ontologies": {
"type": "boolean",
"description": "Check that ontology terms in an input SDRF file exist.",
"fa_icon": "far fa-check-square",
"help_text": "If false, only a basic readability check is performed on an input SDRF file. This option is useful when ontology providers are inaccessible.",
"default": true
},
"skip_ms_validation": {
"type": "boolean",
"description": "Skip validation of mass spectrometry files.",
"fa_icon": "far fa-check-square",
"help_text": "Skip validation of mass spectrometry metadata, including PTMs, tolerances or enzymes. Only useful if your metadata is correct but the terms are not in ontologies."
},
"skip_factor_validation": {
"type": "boolean",
"description": "Skip validation of factor columns.",
"fa_icon": "far fa-check-square",
"help_text": "Skip validation of factor columns in the SDRF. Only useful if your factor values are correct but the sdrf-validation library does not recognize them.",
"default": true
},
"skip_experimental_design_validation": {
"type": "boolean",
"description": "Skip validation of experimental design.",
"fa_icon": "far fa-check-square",
"help_text": "Skip validation of experimental design in the SDRF. Only useful if your experimental design is correct but the sdrf-validation library does not recognize it."
},
"use_ols_cache_only": {
"type": "boolean",
"description": "Use cached version of the Ontology Lookup Service (OLS).",
Expand Down
3 changes: 1 addition & 2 deletions subworkflows/local/create_input_channel/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ workflow CREATE_INPUT_CHANNEL {
// Always parse as SDRF using DIA-NN converter
SDRF_PARSING(ch_sdrf)
ch_versions = ch_versions.mix(SDRF_PARSING.out.versions)
ch_config = SDRF_PARSING.out.ch_sdrf_config_file
ch_expdesign = SDRF_PARSING.out.ch_expdesign
ch_diann_cfg = SDRF_PARSING.out.ch_diann_cfg

Expand All @@ -27,7 +26,7 @@ workflow CREATE_INPUT_CHANNEL {
experiment_id: file(ch_sdrf.toString()).baseName,
]

ch_config
ch_expdesign
.splitCsv(header: true, sep: '\t')
.map { row -> create_meta_channel(row, enzymes, files, wrapper) }
.set { ch_meta_config_dia }
Expand Down
41 changes: 23 additions & 18 deletions workflows/dia.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
//
// MODULES: Local to the pipeline
//
include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main'
include { DIANN_MSSTATS } from '../modules/local/diann/diann_msstats/main'
include { PRELIMINARY_ANALYSIS } from '../modules/local/diann/preliminary_analysis/main'
include { PARSE_EMPIRICAL_LOG } from '../modules/local/parse_empirical_log/main'
include { ASSEMBLE_EMPIRICAL_LIBRARY } from '../modules/local/diann/assemble_empirical_library/main'
include { INSILICO_LIBRARY_GENERATION } from '../modules/local/diann/insilico_library_generation/main'
include { INDIVIDUAL_ANALYSIS } from '../modules/local/diann/individual_analysis/main'
Expand Down Expand Up @@ -59,26 +60,30 @@ workflow DIA {
}

if (params.skip_preliminary_analysis) {
def log_file = params.empirical_assembly_log ? file(params.empirical_assembly_log) : null
def parsed_m2 = "0"
def parsed_m1 = "0"
def parsed_w = "0"
if (log_file && log_file.exists()) {
def matcher = log_file.text =~ /Mass accuracy = ([0-9.]+)ppm, MS1 accuracy = ([0-9.]+)ppm, Scan window = ([0-9.]+)/
if (matcher) {
parsed_m2 = matcher[0][1]
parsed_m1 = matcher[0][2]
parsed_w = matcher[0][3]
if (params.empirical_assembly_log) {
ch_log_file = Channel.fromPath(params.empirical_assembly_log, checkIfExists: true)
PARSE_EMPIRICAL_LOG(ch_log_file)
ch_parsed_vals = PARSE_EMPIRICAL_LOG.out.parsed_vals.map { parsed_str ->
def clean_str = parsed_str.trim()
if (clean_str == "0,0,0") {
return "${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}"
} else {
return clean_str
}
}
} else {
ch_parsed_vals = Channel.value("${params.mass_acc_ms2},${params.mass_acc_ms1},${params.scan_window}")
}
indiv_fin_analysis_in = ch_file_preparation_results
.combine(ch_searchdb)
.combine(speclib)
.map { meta_map, ms_file, fasta, library ->
.combine(ch_parsed_vals)
.map { meta_map, ms_file, fasta, library, param_string ->
def values = param_string.split(',')
def new_meta = meta_map + [
mass_acc_ms2 : parsed_m2,
mass_acc_ms1 : parsed_m1,
scan_window : parsed_w
mass_acc_ms2 : values[0],
mass_acc_ms1 : values[1],
scan_window : values[2]
]
return [ new_meta, ms_file, fasta, library ]
}
Expand Down Expand Up @@ -121,9 +126,9 @@ workflow DIA {
indiv_fin_analysis_in = ch_file_preparation_results
.combine(ch_searchdb)
.combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library)
.combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params)
.map { meta_map, ms_file, fasta, library, param_file ->
def values = param_file.text.trim().split(',')
.combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.calibrated_params_val)
.map { meta_map, ms_file, fasta, library, param_string ->
def values = param_string.trim().split(',')
def new_meta = meta_map + [
mass_acc_ms2 : values[0],
mass_acc_ms1 : values[1],
Expand Down
Loading