Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

restruct DIA-NN step2 #178

Merged
merged 1 commit into from
May 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions modules/local/diann_preliminary_analysis/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
process DIANN_PRELIMINARY_ANALYSIS {
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' :
'biocontainers/diann:v1.8.1_cv1' }"

input:
tuple file(mzML), file(predict_tsv), file(diann_config)

output:
path "*.quant", emit: diann_quant
path "${mzML.baseName}_lib.tsv", emit: lib
path "*.log.txt", emit: log
path "versions.yml", emit: version

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
min_pr_mz = params.min_pr_mz ? "--min-pr-mz $params.min_pr_mz" : ""
max_pr_mz = params.max_pr_mz ? "--max-pr-mz $params.max_pr_mz" : ""
min_fr_mz = params.min_fr_mz ? "--min-fr-mz $params.min_fr_mz" : ""
max_fr_mz = params.max_fr_mz ? "--max-fr-mz $params.max_fr_mz" : ""

quick_mass_acc = params.quick_mass_acc ? "--quick-mass-acc" : ""
time_corr_only = params.time_corr_only ? "--time-corr-only" : ""

"""
diann `cat diann_config.cfg` \\
--lib ${predict_tsv} \\
--f ${mzML} \\
${min_pr_mz} \\
${max_pr_mz} \\
${min_fr_mz} \\
${max_fr_mz} \\
--threads ${task.cpus} \\
--missed-cleavages $params.allowed_missed_cleavages \\
--min-pep-len $params.min_peptide_length \\
--max-pep-len $params.max_peptide_length \\
--min-pr-charge $params.min_precursor_charge \\
--max-pr-charge $params.max_precursor_charge \\
--var-mods $params.max_mods \\
--verbose $params.diann_debug \\
--window $params.scan_window \\
--gen-spec-lib \\
--out-lib ${mzML.baseName}_lib.tsv \\
--temp ./ \\
--min-corr $params.min_corr \\
--corr-diff $params.corr_diff \\
${quick_mass_acc} \\
${time_corr_only} \\
$args \\
|& tee diann.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "(\\d*\\.\\d+\\.\\d+)|(\\d*\\.\\d+)")
END_VERSIONS
"""
}
43 changes: 43 additions & 0 deletions modules/local/diann_preliminary_analysis/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: diann_preliminary_analysis
description: A module for preliminary analysis of individual raw files based on DIA-NN.
keywords:
- DIA-NN
- DIA
tools:
- DIA-NN:
description: |
DIA-NN - a universal software for data-independent acquisition (DIA) proteomics data processing by Demichev.
homepage: https://github.com/vdemichev/DiaNN
documentation: https://github.com/vdemichev/DiaNN
input:
- predict_tsv:
type: file
description: Silico-predicted spectral library by deep leaning predictor in DIA-NN
pattern: "*.tsv"
- mzML:
type: file
description: Spectra file in mzML format
pattern: "*.mzML"
- cfg:
type: dir
description: Specifies a file to load options/commands from.
pattern: "*.cfg"
output:
- diann_quant:
type: file
description: Quantification file from DIA-NN
pattern: "*.quant"
- lib:
type: file
description: Spectral library file
pattern: "*.tsv"
- log:
type: file
description: DIA-NN log file
pattern: "diann_report.log.txt"
- version:
type: file
description: File containing software version
pattern: "versions.yml"
authors:
- "@daichengxin"
6 changes: 6 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ params {
// DIA-NN
matrix_spec_q = 0.01
diann_debug = 3
quick_mass_acc = false
scan_window = 7
min_corr = 2.0
corr_diff = 1.0
time_corr_only = true

// TODO think about unifying it with DDA parameters
min_pr_mz = null
max_pr_mz = null
Expand Down
32 changes: 32 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,38 @@
"default": 0.01,
"fa_icon": "fas fa-filter"
},
"quick_mass_acc": {
"type": "boolean",
"description": "Choosing the MS2 mass accuracy setting automatically",
"default": false,
"fa_icon": "fas fa-toggle-on",
"help_text": "(experimental) when choosing the MS2 mass accuracy setting automatically, DIA-NN will use a fast heuristical algorithm instead of IDs number optimisation"
},
"scan_window": {
"type": "integer",
"description": "Set the scan window radius to a specific value",
"fa_icon": "fas fa-sliders-h",
"help_text": " Ideally, should be approximately equal to the average number of data points per peak",
"default": 7
},
"min_corr": {
"type": "number",
"description": "Only peaks with correlation sum exceeding min_corr will be considered",
"fa_icon": "fas fa-filter",
"default": 2.0
},
"corr_diff": {
"type": "number",
"description": "Peaks with correlation sum below corr_diff from maximum will not be considered",
"fa_icon": "fas fa-filter",
"default": 1.0
},
"time_corr_only": {
"type": "boolean",
"description": "A single score will be used until RT alignment to save memory",
"fa_icon": "fas fa-filter",
"default": true
},
"min_pr_mz": {
"type": "number",
"description": "The minimum precursor m/z for the in silico library generation or library-free search",
Expand Down