diff --git a/modules/local/diann_preliminary_analysis/main.nf b/modules/local/diann_preliminary_analysis/main.nf new file mode 100644 index 00000000..38e2214d --- /dev/null +++ b/modules/local/diann_preliminary_analysis/main.nf @@ -0,0 +1,63 @@ +process DIANN_PRELIMINARY_ANALYSIS { + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' : + 'biocontainers/diann:v1.8.1_cv1' }" + + input: + tuple file(mzML), file(predict_tsv), file(diann_config) + + output: + path "*.quant", emit: diann_quant + path "${mzML.baseName}_lib.tsv", emit: lib + path "*.log.txt", emit: log + path "versions.yml", emit: version + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + min_pr_mz = params.min_pr_mz ? "--min-pr-mz $params.min_pr_mz" : "" + max_pr_mz = params.max_pr_mz ? "--max-pr-mz $params.max_pr_mz" : "" + min_fr_mz = params.min_fr_mz ? "--min-fr-mz $params.min_fr_mz" : "" + max_fr_mz = params.max_fr_mz ? "--max-fr-mz $params.max_fr_mz" : "" + + quick_mass_acc = params.quick_mass_acc ? "--quick-mass-acc" : "" + time_corr_only = params.time_corr_only ? "--time-corr-only" : "" + + """ + diann `cat diann_config.cfg` \\ + --lib ${predict_tsv} \\ + --f ${mzML} \\ + ${min_pr_mz} \\ + ${max_pr_mz} \\ + ${min_fr_mz} \\ + ${max_fr_mz} \\ + --threads ${task.cpus} \\ + --missed-cleavages $params.allowed_missed_cleavages \\ + --min-pep-len $params.min_peptide_length \\ + --max-pep-len $params.max_peptide_length \\ + --min-pr-charge $params.min_precursor_charge \\ + --max-pr-charge $params.max_precursor_charge \\ + --var-mods $params.max_mods \\ + --verbose $params.diann_debug \\ + --window $params.scan_window \\ + --gen-spec-lib \\ + --out-lib ${mzML.baseName}_lib.tsv \\ + --temp ./ \\ + --min-corr $params.min_corr \\ + --corr-diff $params.corr_diff \\ + ${quick_mass_acc} \\ + ${time_corr_only} \\ + $args \\ + |& tee diann.log + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "(\\d*\\.\\d+\\.\\d+)|(\\d*\\.\\d+)") + END_VERSIONS + """ +} diff --git a/modules/local/diann_preliminary_analysis/meta.yml b/modules/local/diann_preliminary_analysis/meta.yml new file mode 100644 index 00000000..75fe2331 --- /dev/null +++ b/modules/local/diann_preliminary_analysis/meta.yml @@ -0,0 +1,43 @@ +name: diann_preliminary_analysis +description: A module for preliminary analysis of individual raw files based on DIA-NN. +keywords: + - DIA-NN + - DIA +tools: + - DIA-NN: + description: | + DIA-NN - a universal software for data-independent acquisition (DIA) proteomics data processing by Demichev. + homepage: https://github.com/vdemichev/DiaNN + documentation: https://github.com/vdemichev/DiaNN +input: + - predict_tsv: + type: file + description: Silico-predicted spectral library by deep leaning predictor in DIA-NN + pattern: "*.tsv" + - mzML: + type: file + description: Spectra file in mzML format + pattern: "*.mzML" + - cfg: + type: dir + description: Specifies a file to load options/commands from. + pattern: "*.cfg" +output: + - diann_quant: + type: file + description: Quantification file from DIA-NN + pattern: "*.quant" + - lib: + type: file + description: Spectral library file + pattern: "*.tsv" + - log: + type: file + description: DIA-NN log file + pattern: "diann_report.log.txt" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@daichengxin" diff --git a/nextflow.config b/nextflow.config index e9277f61..ef6eb118 100644 --- a/nextflow.config +++ b/nextflow.config @@ -147,6 +147,12 @@ params { // DIA-NN matrix_spec_q = 0.01 diann_debug = 3 + quick_mass_acc = false + scan_window = 7 + min_corr = 2.0 + corr_diff = 1.0 + time_corr_only = true + // TODO think about unifying it with DDA parameters min_pr_mz = null max_pr_mz = null diff --git a/nextflow_schema.json b/nextflow_schema.json index ee1b6ed7..59a93320 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -818,6 +818,38 @@ "default": 0.01, "fa_icon": "fas fa-filter" }, + "quick_mass_acc": { + "type": "boolean", + "description": "Choosing the MS2 mass accuracy setting automatically", + "default": false, + "fa_icon": "fas fa-toggle-on", + "help_text": "(experimental) when choosing the MS2 mass accuracy setting automatically, DIA-NN will use a fast heuristical algorithm instead of IDs number optimisation" + }, + "scan_window": { + "type": "integer", + "description": "Set the scan window radius to a specific value", + "fa_icon": "fas fa-sliders-h", + "help_text": " Ideally, should be approximately equal to the average number of data points per peak", + "default": 7 + }, + "min_corr": { + "type": "number", + "description": "Only peaks with correlation sum exceeding min_corr will be considered", + "fa_icon": "fas fa-filter", + "default": 2.0 + }, + "corr_diff": { + "type": "number", + "description": "Peaks with correlation sum below corr_diff from maximum will not be considered", + "fa_icon": "fas fa-filter", + "default": 1.0 + }, + "time_corr_only": { + "type": "boolean", + "description": "A single score will be used until RT alignment to save memory", + "fa_icon": "fas fa-filter", + "default": true + }, "min_pr_mz": { "type": "number", "description": "The minimum precursor m/z for the in silico library generation or library-free search",