diff --git a/README.md b/README.md index 2a694fdf..010c3a90 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ![nf-core/quantms](docs/images/nf-core/quantms_logo_light.png#gh-light-mode-only) ![nf-core/quantms](docs/images/nf-core/quantms_logo_dark.png#gh-dark-mode-only) +# ![nf-core/quantms](docs/images/nf-core-quantms_logo_light.png#gh-light-mode-only) ![nf-core/quantms](docs/images/nf-core-quantms_logo_dark.png#gh-dark-mode-only) [![GitHub Actions CI Status](https://github.com/nf-core/quantms/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/quantms/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/quantms/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/quantms/actions?query=workflow%3A%22nf-core+linting%22) @@ -77,7 +77,7 @@ DIA-LFQ: 3. Download the pipeline and test it on a minimal dataset with a single command: ```console - nextflow run nf-core/quantms -profile test,YOURPROFILE --input project.sdrf.tsv --database protein.fasta + nextflow run nf-core/quantms -profile test,YOURPROFILE --input project.sdrf.tsv --database protein.fasta --outdir ``` Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. @@ -97,7 +97,7 @@ DIA-LFQ: ```console - nextflow run nf-core/quantms -profile --input project.sdrf.tsv --database database.fasta + nextflow run nf-core/quantms -profile --input project.sdrf.tsv --database database.fasta --outdir ``` ## Documentation diff --git a/conf/dev.config b/conf/dev.config index 3a6df93d..ae790eb8 100644 --- a/conf/dev.config +++ b/conf/dev.config @@ -1,11 +1,15 @@ /* - * ------------------------------------------------- - * Nextflow config file for running with nightly dev containers - * ------------------------------------------------- - * Only overwrites the container. See dev/ folder for building instructions. - * Use as follows: - * nextflow run nf-core/quantms -profile dev, - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running with nightly dev. containers (mainly for OpenMS) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Only overwrites the container. E.g. uses the OpenMS nightly executable and thirdparty + containers. TODO Currently does nothing, we need to set it up. + + Use as follows: + nextflow run nf-core/quantms -profile test, [--outdir ] + +------------------------------------------------------------------------------------------- +*/ params { config_profile_name = 'Development profile' diff --git a/conf/modules.config b/conf/modules.config index 9e021ac0..ebcaaf58 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -160,8 +160,8 @@ process { ] } - // EPIFILTER - withName: 'NFCORE_QUANTMS:QUANTMS:TMT:.*:EPIFILTER' { + // IDFILTER on PROTEIN LEVEL + withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEININFERENCE:IDFILTER' { ext.args = [ "-score:prot \"$params.protein_level_fdr_cutoff\"", "-delete_unreferenced_peptide_hits", @@ -175,27 +175,22 @@ process { ] } - //IDCONFLICTRESOLVER - withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:IDCONFLICTRESOLVER' { - ext.args = "-debug $params.conflict_resolver_debug" - } - - //PROTEINQUANTIFIER + // PROTEINQUANTIFIER withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:PROTEINQUANTIFIER' { - ext.args = "-debug $params.proteinquant_debug" + ext.args = "-debug 0" } - // MSstatsConverter + // MSSTATSCONVERTER withName: 'NFCORE_QUANTMS:QUANTMS:TMT:PROTEINQUANT:MSSTATSCONVERTER' { - ext.args = "-debug $params.msstatsconverter_debug" + ext.args = "-debug 0" } } -if (params.protein_inference_bayesian) { +if (params.protein_inference_method == "bayesian") { process { // EPIFANY withName: 'NFCORE_QUANTMS:QUANTMS:.*:EPIFANY' { - ext.args = "-debug $params.protein_inference_debug" + ext.args = "-keep_best_psm_only false -debug $params.protein_inference_debug" publishDir = [ path: { "${params.outdir}/epifany" }, mode: params.publish_dir_mode, @@ -221,16 +216,6 @@ if (params.protein_inference_bayesian) { process { - // INDEXPEPTIDES - withName: 'NFCORE_QUANTMS:QUANTMS:.*:INDEXPEPTIDES' { - publishDir = [ - path: { "${params.outdir}/indexpeptides" }, - mode: params.publish_dir_mode, - pattern: '*.log', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // IDFILTER withName: 'NFCORE_QUANTMS:QUANTMS:.*:ID:PSMFDRCONTROL:IDFILTER' { ext.args = "-score:pep \"$params.psm_pep_fdr_cutoff\"" @@ -244,7 +229,7 @@ process { // PROTEOMICSLFQ withName: 'NFCORE_QUANTMS:QUANTMS:LFQ:PROTEOMICSLFQ' { - ext.args = "-debug $params.inf_quant_debug" + ext.args = "-debug $params.plfq_debug" } // DIA-NN diff --git a/conf/test.config b/conf/test.config index 10bcf963..6be93226 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,24 +1,26 @@ /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests (ISO) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/quantms -profile test, + nextflow run nf-core/quantms -profile test, [--outdir ] ----------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------- */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile DDA ISO' + config_profile_description = 'Minimal test dataset to check pipeline function of the isotopic labelling branch of the pipeline' // Limit resources so that this can run on GitHub Actions max_cpus = 2 max_memory = '6.GB' max_time = '6.h' + outdir = "./results_iso" + // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv' diff --git a/conf/test_dia.config b/conf/test_dia.config index 3d84dd4b..8ab5b74b 100644 --- a/conf/test_dia.config +++ b/conf/test_dia.config @@ -1,20 +1,26 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/quantms -profile test, - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests (DIA) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple test. + + Use as follows: + nextflow run nf-core/quantms -profile test_dia, [--outdir ] + +------------------------------------------------------------------------------------------------ +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile for DIA' + config_profile_description = 'Minimal test dataset to check pipeline function for the data-independent acquisition pipeline branch.' + // Limit resources so that this can run on GitHub Actions max_cpus = 2 max_memory = 6.GB max_time = 48.h + outdir = './results_dia' + // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/PXD026600/PXD026600.sdrf.tsv' database = 'ftp://massive.ucsd.edu/MSV000087597/sequence/REF_EColi_K12_UPS1_combined.fasta' diff --git a/conf/test_full.config b/conf/test_full.config index 813aec9d..54af6b23 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -5,14 +5,16 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/quantms -profile test_full, --outdir + nextflow run nf-core/quantms -profile test_full, [--outdir ] ---------------------------------------------------------------------------------------- */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function and sanity of results' + config_profile_name = 'Full test profile DDA ISO' + config_profile_description = 'Full test dataset in isotopic labelling mode to check pipeline function and sanity of results' + + outdir = "./results_iso_full" // Input data for full size test input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv' diff --git a/conf/test_lfq.config b/conf/test_lfq.config index b72061d1..41959ef2 100644 --- a/conf/test_lfq.config +++ b/conf/test_lfq.config @@ -1,20 +1,26 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/quantms -profile test, - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests (LFQ) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple test. + + Use as follows: + nextflow run nf-core/quantms -profile test_lfq, [--outdir ] + +------------------------------------------------------------------------------------------------ +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile for DDA LFQ' + config_profile_description = 'Minimal test dataset to check pipeline function of the label-free quantification branch of the pipeline' + // Limit resources so that this can run on GitHub Actions max_cpus = 2 max_memory = 6.GB max_time = 48.h + outdir = "./results_lfq" + // Input data labelling_type = "label free sample" input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci/BSA/BSA_design_urls.tsv' @@ -25,5 +31,5 @@ params { enable_qc = true add_triqler_output = true protein_level_fdr_cutoff = 1.0 - acqusition_method = "dda" + acquisition_method = "dda" } diff --git a/conf/test_localize.config b/conf/test_localize.config index 8e9ae1e6..7a64e2d2 100644 --- a/conf/test_localize.config +++ b/conf/test_localize.config @@ -1,22 +1,26 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests with - * modification localization - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/quantms -profile test_localize, - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests (LFQ) with mod. localization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple test. + + Use as follows: + nextflow run nf-core/quantms -profile test_localize, [--outdir ] + +---------------------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test phospho-localization profile' - config_profile_description = 'Minimal test dataset to check pipeline function for phospho-localization, SDRF parsing and ConsensusID.' + config_profile_name = 'Test PTM-localization profile' + config_profile_description = 'Minimal test dataset to check pipeline function for PTM-localization, SDRF parsing and ConsensusID.' // Limit resources so that this can run on Travis max_cpus = 2 max_memory = 6.GB max_time = 1.h + outdir = "./results_localize" + // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/proteomicslfq/testdata/phospho/test_phospho.sdrf' database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/pools_crap_targetdecoy.fasta' diff --git a/modules/local/diannsearch/main.nf b/modules/local/diannsearch/main.nf index ef74281a..9f2412a4 100644 --- a/modules/local/diannsearch/main.nf +++ b/modules/local/diannsearch/main.nf @@ -12,9 +12,9 @@ process DIANNSEARCH { file(diann_config) output: - path "report.tsv", emit: report - path "report.stats.tsv", emit: report_stat - path "report.log.txt", emit: log + path "diann_report.tsv", emit: report + path "diann_report.stats.tsv", emit: report_stat + path "diann_report.log.txt", emit: log path "versions.yml", emit: version path "*.tsv" @@ -51,13 +51,14 @@ process DIANNSEARCH { ${mbr} \\ --reannotate \\ ${normalize} \\ + --out diann_report.tsv \\ --verbose $params.diann_debug \\ > diann.log cat <<-END_VERSIONS > versions.yml "${task.process}": - DIA-NN: 1.8.0 + DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "(\\d*\\.\\d+\\.\\d+)|(\\d*\\.\\d+)") END_VERSIONS """ } diff --git a/modules/local/diannsearch/meta.yml b/modules/local/diannsearch/meta.yml index 09aa7f5f..8b1de484 100644 --- a/modules/local/diannsearch/meta.yml +++ b/modules/local/diannsearch/meta.yml @@ -30,15 +30,15 @@ output: - report: type: file description: Main report file. A text table containing precursor and protein IDs, as well as plenty of associated information. Most column names are self-explanatory. - pattern: "report.tsv" + pattern: "diann_report.tsv" - report_stat: type: file description: Contains a number of QC metrics which can be used for data filtering, e.g. to exclude failed runs, or as a readout for method optimization. - pattern: "report.stats.tsv" + pattern: "diann_report.stats.tsv" - log: type: file description: DIA-NN log file - pattern: "report.log.txt" + pattern: "diann_report.log.txt" - version: type: file description: File containing software version diff --git a/modules/local/librarygeneration/main.nf b/modules/local/librarygeneration/main.nf index 277c40d4..893af701 100644 --- a/modules/local/librarygeneration/main.nf +++ b/modules/local/librarygeneration/main.nf @@ -51,7 +51,7 @@ process LIBRARYGENERATION { cat <<-END_VERSIONS > versions.yml "${task.process}": - DIA-NN: 1.8.0 + DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "(\\d*\\.\\d+\\.\\d+)|(\\d*\\.\\d+)") END_VERSIONS """ } diff --git a/modules/local/librarygeneration/meta.yml b/modules/local/librarygeneration/meta.yml index 0133e1a9..ee117c7c 100644 --- a/modules/local/librarygeneration/meta.yml +++ b/modules/local/librarygeneration/meta.yml @@ -45,4 +45,4 @@ output: description: File containing software version pattern: "*.{version.txt}" authors: - - "@Chengxin Dai" + - "@daichengxin" diff --git a/modules/local/openms/consensusid/main.nf b/modules/local/openms/consensusid/main.nf index 811b8235..752acb65 100644 --- a/modules/local/openms/consensusid/main.nf +++ b/modules/local/openms/consensusid/main.nf @@ -2,11 +2,12 @@ process CONSENSUSID { label 'process_medium' // TODO could be easily parallelized label 'process_single_thread' + label 'openms' conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file), val(qval_score) diff --git a/modules/local/openms/decoydatabase/main.nf b/modules/local/openms/decoydatabase/main.nf index 990d7797..19dd393a 100644 --- a/modules/local/openms/decoydatabase/main.nf +++ b/modules/local/openms/decoydatabase/main.nf @@ -1,9 +1,11 @@ process DECOYDATABASE { label 'process_very_low' + label 'openms' + conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: path(db_for_decoy) diff --git a/modules/local/openms/epifany/main.nf b/modules/local/openms/epifany/main.nf index d0b4254c..98592664 100644 --- a/modules/local/openms/epifany/main.nf +++ b/modules/local/openms/epifany/main.nf @@ -1,11 +1,13 @@ process EPIFANY { label 'process_medium' - publishDir "${params.outdir}", + label 'openms' - conda (params.enable_conda ? "bioconda::bumbershoot bioconda::comet-ms bioconda::crux-toolkit=3.2 bioconda::fido=1.0 conda-forge::gnuplot bioconda::luciphor2=2020_04_03 bioconda::msgf_plus=2021.03.22 bioconda::openms=2.8.0 bioconda::pepnovo=20101117 bioconda::percolator=3.5 bioconda::sirius-csifingerid=4.0.1 bioconda::thermorawfileparser=1.3.4 bioconda::xtandem=15.12.15.2 bioconda::openms-thirdparty=2.8.0" : null) + publishDir "${params.outdir}" + + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(consus_file) @@ -17,23 +19,24 @@ process EPIFANY { script: def args = task.ext.args ?: '' + gg = params.protein_quant == 'shared_peptides' ? 'remove_proteins_wo_evidence' : 'none' """ Epifany \\ -in ${consus_file} \\ -protein_fdr true \\ -threads $task.cpus \\ - $args \\ -algorithm:keep_best_PSM_only $params.keep_best_PSM_only \\ -algorithm:update_PSM_probabilities $params.update_PSM_probabilities \\ - -greedy_group_resolution $params.greedy_group_resolution \\ + -greedy_group_resolution $gg \\ -algorithm:top_PSMs $params.top_PSMs \\ -out ${consus_file.baseName}_epi.consensusXML \\ + $args \\ > ${consus_file.baseName}_inference.log cat <<-END_VERSIONS > versions.yml "${task.process}": - DecoyDatabase: \$(Epifany 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g') + Epifany: \$(Epifany 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g') END_VERSIONS """ } diff --git a/modules/local/openms/extractpsmfeatures/main.nf b/modules/local/openms/extractpsmfeatures/main.nf index ff7cc954..3f6d3dc6 100644 --- a/modules/local/openms/extractpsmfeatures/main.nf +++ b/modules/local/openms/extractpsmfeatures/main.nf @@ -1,11 +1,12 @@ process EXTRACTPSMFEATURES { label 'process_very_low' label 'process_single_thread' + label 'openms' conda (params.enable_conda ? "bioconda::bumbershoot bioconda::comet-ms bioconda::crux-toolkit=3.2 bioconda::fido=1.0 conda-forge::gnuplot bioconda::luciphor2=2020_04_03 bioconda::msgf_plus=2021.03.22 bioconda::openms=2.8.0 bioconda::pepnovo=20101117 bioconda::percolator=3.5 bioconda::sirius-csifingerid=4.0.1 bioconda::thermorawfileparser=1.3.4 bioconda::xtandem=15.12.15.2 bioconda::openms-thirdparty=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file) diff --git a/modules/local/openms/falsediscoveryrate/main.nf b/modules/local/openms/falsediscoveryrate/main.nf index 2c2f414b..43d53816 100644 --- a/modules/local/openms/falsediscoveryrate/main.nf +++ b/modules/local/openms/falsediscoveryrate/main.nf @@ -1,11 +1,12 @@ process FALSEDISCOVERYRATE { - label 'process_very_low' + label 'process_low' label 'process_single_thread' + label 'openms' conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file) diff --git a/modules/local/openms/filemerge/main.nf b/modules/local/openms/filemerge/main.nf index 11b78b15..99bbd1d2 100644 --- a/modules/local/openms/filemerge/main.nf +++ b/modules/local/openms/filemerge/main.nf @@ -1,11 +1,12 @@ process FILEMERGE { label 'process_medium' label 'process_single_thread' + label 'openms' conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: file(id_map) @@ -24,9 +25,9 @@ process FILEMERGE { -in_type consensusXML \\ -annotate_file_origin \\ -append_method 'append_cols' \\ - -debug $params.filemerge_debug \\ -threads $task.cpus \\ -out ID_mapper_merge.consensusXML \\ + $args \\ > ID_mapper_merge.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms/idconflictresolver/main.nf b/modules/local/openms/idconflictresolver/main.nf index b8527c47..6857104c 100644 --- a/modules/local/openms/idconflictresolver/main.nf +++ b/modules/local/openms/idconflictresolver/main.nf @@ -1,10 +1,11 @@ process IDCONFLICTRESOLVER { label 'process_low' + label 'openms' conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: path consus_file @@ -21,8 +22,8 @@ process IDCONFLICTRESOLVER { IDConflictResolver \\ -in ${consus_file} \\ -threads $task.cpus \\ - $args \\ -out ${consus_file.baseName}_resconf.consensusXML \\ + $args \\ > ${consus_file.baseName}_resconf.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms/idfilter/main.nf b/modules/local/openms/idfilter/main.nf index 20057aef..d4763917 100644 --- a/modules/local/openms/idfilter/main.nf +++ b/modules/local/openms/idfilter/main.nf @@ -1,11 +1,13 @@ process IDFILTER { + label 'process_very_low' label 'process_single_thread' + label 'openms' conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file) @@ -25,7 +27,6 @@ process IDFILTER { -out ${id_file.baseName}_filter$suffix \\ -threads $task.cpus \\ $args \\ - -debug $params.idfilter_debug \\ > ${id_file.baseName}_idfilter.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms/idmapper/main.nf b/modules/local/openms/idmapper/main.nf index 9c52cfbe..0d3ff125 100644 --- a/modules/local/openms/idmapper/main.nf +++ b/modules/local/openms/idmapper/main.nf @@ -1,11 +1,13 @@ process IDMAPPER { tag "$meta.id" + label 'process_medium' + label 'openms' conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file), path(map_file) @@ -24,8 +26,8 @@ process IDMAPPER { -id ${id_file} \\ -in ${map_file} \\ -threads $task.cpus \\ - $args \\ -out ${id_file.baseName}_map.consensusXML \\ + $args \\ > ${id_file.baseName}_map.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms/idpep/main.nf b/modules/local/openms/idpep/main.nf index 1d4e1e9d..c6185288 100644 --- a/modules/local/openms/idpep/main.nf +++ b/modules/local/openms/idpep/main.nf @@ -3,8 +3,8 @@ process IDPEP { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file) diff --git a/modules/local/openms/idscoreswitcher/main.nf b/modules/local/openms/idscoreswitcher/main.nf index a258124c..a6644af5 100644 --- a/modules/local/openms/idscoreswitcher/main.nf +++ b/modules/local/openms/idscoreswitcher/main.nf @@ -5,8 +5,8 @@ process IDSCORESWITCHER { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file), val(new_score) diff --git a/modules/local/openms/indexpeptides/main.nf b/modules/local/openms/indexpeptides/main.nf index b80ac057..f733d838 100644 --- a/modules/local/openms/indexpeptides/main.nf +++ b/modules/local/openms/indexpeptides/main.nf @@ -3,8 +3,8 @@ process INDEXPEPTIDES { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(id_file), path(database) diff --git a/modules/local/openms/isobaricanalyzer/main.nf b/modules/local/openms/isobaricanalyzer/main.nf index afeb9a2d..77599888 100644 --- a/modules/local/openms/isobaricanalyzer/main.nf +++ b/modules/local/openms/isobaricanalyzer/main.nf @@ -4,8 +4,8 @@ process ISOBARICANALYZER { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(mzml_file) @@ -38,8 +38,8 @@ process ISOBARICANALYZER { -extraction:precursor_isotope_deviation $params.precursor_isotope_deviation \\ ${iso_normalization} \\ -${meta.labelling_type}:reference_channel $params.reference_channel \\ - $args \\ -out ${mzml_file.baseName}_iso.consensusXML \\ + $args \\ > ${mzml_file.baseName}_isob.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms/msstatsconverter/main.nf b/modules/local/openms/msstatsconverter/main.nf index 7950f7c3..59b17411 100644 --- a/modules/local/openms/msstatsconverter/main.nf +++ b/modules/local/openms/msstatsconverter/main.nf @@ -3,8 +3,8 @@ process MSSTATSCONVERTER { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: path consensusXML diff --git a/modules/local/openms/mzmlindexing/main.nf b/modules/local/openms/mzmlindexing/main.nf index 196795ae..62fff846 100644 --- a/modules/local/openms/mzmlindexing/main.nf +++ b/modules/local/openms/mzmlindexing/main.nf @@ -4,8 +4,8 @@ process MZMLINDEXING { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(mzmlfile) diff --git a/modules/local/openms/openmspeakpicker/main.nf b/modules/local/openms/openmspeakpicker/main.nf index ef2274c3..9346abfb 100644 --- a/modules/local/openms/openmspeakpicker/main.nf +++ b/modules/local/openms/openmspeakpicker/main.nf @@ -4,8 +4,8 @@ process OPENMSPEAKPICKER { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(mzml_file) diff --git a/modules/local/openms/proteininference/main.nf b/modules/local/openms/proteininference/main.nf index bfcfb0da..1ed84361 100644 --- a/modules/local/openms/proteininference/main.nf +++ b/modules/local/openms/proteininference/main.nf @@ -3,8 +3,8 @@ process PROTEININFERENCE { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: tuple val(meta), path(consus_file) @@ -16,6 +16,8 @@ process PROTEININFERENCE { script: def args = task.ext.args ?: '' + gg = params.protein_quant == 'shared_peptides' ? '-Algorithm:greedy_group_resolution' : '' + groups = params.protein_quant == 'strictly_unique_peptides' ? 'false' : 'true' """ ProteinInference \\ @@ -24,10 +26,13 @@ process PROTEININFERENCE { -picked_fdr $params.picked_fdr \\ -picked_decoy_string $params.decoy_string \\ -protein_fdr true \\ + -Algorithm:use_shared_peptides $params.use_shared_peptides \\ + -Algorithm:annotate_indistinguishable_groups $groups \\ + $gg \\ -Algorithm:score_aggregation_method $params.protein_score \\ - $args \\ -Algorithm:min_peptides_per_protein $params.min_peptides_per_protein \\ -out ${consus_file.baseName}_epi.consensusXML \\ + $args \\ > ${consus_file.baseName}_inference.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms/proteinquantifier/main.nf b/modules/local/openms/proteinquantifier/main.nf index d04ac553..49ab74c5 100644 --- a/modules/local/openms/proteinquantifier/main.nf +++ b/modules/local/openms/proteinquantifier/main.nf @@ -3,8 +3,8 @@ process PROTEINQUANTIFIER { conda (params.enable_conda ? "openms::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: path epi_filt_resolve diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index a2151166..1aff7dca 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -3,8 +3,8 @@ process PROTEOMICSLFQ { conda (params.enable_conda ? "openms::openms=2.8.0.dev" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : - 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_1' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_1' }" input: path(mzmls) @@ -15,7 +15,7 @@ process PROTEOMICSLFQ { output: path "out.mzTab", emit: out_mztab path "out.consensusXML", emit: out_consensusXML - path "out_msstats.csv", emit: out_msstats + path "out_msstats.csv", emit: out_msstats optional true path "out_triqler.tsv", emit: out_triqler optional true path "debug_mergedIDs.idXML", emit: debug_mergedIDs optional true path "debug_mergedIDs_inference.idXML", emit: debug_mergedIDs_inference optional true diff --git a/modules/local/openms/thirdparty/searchenginecomet/main.nf b/modules/local/openms/thirdparty/searchenginecomet/main.nf index 1fa53461..9eb86a1a 100644 --- a/modules/local/openms/thirdparty/searchenginecomet/main.nf +++ b/modules/local/openms/thirdparty/searchenginecomet/main.nf @@ -2,7 +2,7 @@ process SEARCHENGINECOMET { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bumbershoot bioconda::comet-ms bioconda::crux-toolkit=3.2 bioconda::fido=1.0 conda-forge::gnuplot bioconda::luciphor2=2020_04_03 bioconda::msgf_plus=2021.03.22 bioconda::openms=2.8.0 bioconda::pepnovo=20101117 bioconda::percolator=3.5 bioconda::sirius-csifingerid=4.0.1 bioconda::thermorawfileparser=1.3.4 bioconda::xtandem=15.12.15.2 bioconda::openms-thirdparty=2.8.0" : null) + conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" @@ -67,6 +67,18 @@ process SEARCHENGINECOMET { else if (meta.enzyme == "Lys-C") enzyme = "Lys-C/P" } + num_enzyme_termini = "" + if (meta.enzyme == "unspecific cleavage") + { + num_enzyme_termini = "none" + } + else if (params.num_enzyme_termini == "fully") + { + num_enzyme_termini = "full" + } + + il_equiv = params.IL_equivalent ? "-PeptideIndexing:IL_equivalent" : "" + """ CometAdapter \\ -in ${mzml_file} \\ @@ -89,6 +101,8 @@ process SEARCHENGINECOMET { -precursor_error_units $meta.precursormasstoleranceunit \\ -fragment_mass_tolerance ${bin_tol} \\ -fragment_bin_offset ${bin_offset} \\ + ${il_equiv} \\ + -PeptideIndexing:unmatched_action ${params.unmatched_action} \\ -debug $params.db_debug \\ -force \\ $args \\ diff --git a/modules/local/openms/thirdparty/searchenginemsgf/main.nf b/modules/local/openms/thirdparty/searchenginemsgf/main.nf index 73ca0d10..07eb51e0 100644 --- a/modules/local/openms/thirdparty/searchenginemsgf/main.nf +++ b/modules/local/openms/thirdparty/searchenginemsgf/main.nf @@ -2,7 +2,7 @@ process SEARCHENGINEMSGF { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bumbershoot bioconda::comet-ms bioconda::crux-toolkit=3.2 bioconda::fido=1.0 conda-forge::gnuplot bioconda::luciphor2=2020_04_03 bioconda::msgf_plus=2021.03.22 bioconda::openms=2.8.0 bioconda::pepnovo=20101117 bioconda::percolator=3.5 bioconda::sirius-csifingerid=4.0.1 bioconda::thermorawfileparser=1.3.4 bioconda::xtandem=15.12.15.2 bioconda::openms-thirdparty=2.8.0" : null) + conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_0' : 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_0' }" @@ -45,6 +45,18 @@ process SEARCHENGINEMSGF { inst = params.instrument ?: "low_res" } + num_enzyme_termini = "" + if (meta.enzyme == "unspecific cleavage") + { + num_enzyme_termini = "none" + } + else if (params.num_enzyme_termini == "fully") + { + num_enzyme_termini = "full" + } + + il_equiv = params.IL_equivalent ? "-PeptideIndexing:IL_equivalent" : "" + """ MSGFPlusAdapter \\ -protocol $params.protocol \\ @@ -68,6 +80,8 @@ process SEARCHENGINEMSGF { -fixed_modifications ${meta.fixedmodifications.tokenize(',').collect() { "'${it}'" }.join(" ") } \\ -variable_modifications ${meta.variablemodifications.tokenize(',').collect() { "'${it}'" }.join(" ") } \\ -max_mods $params.max_mods \\ + ${il_equiv} \\ + -PeptideIndexing:unmatched_action ${params.unmatched_action} \\ -debug $params.db_debug \\ $args \\ > ${mzml_file.baseName}_msgf.log diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 025bbb51..35d2fd00 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -3,34 +3,32 @@ process PMULTIQC { conda (params.enable_conda ? "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.10" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.10--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.11--pyhdfd78af_0" } else { - container "quay.io/biocontainers/pmultiqc:0.0.10--pyhdfd78af_0" + container "quay.io/biocontainers/pmultiqc:0.0.11--pyhdfd78af_0" } input: - path expdesign - path 'mzMLs/*' path 'results/*' - path 'raw_ids/*' path quantms_log output: path "*.html", emit: ch_pmultiqc_report - path "*.db", emit: ch_pmultiqc_db + path "*.db", optional:true, emit: ch_pmultiqc_db path "versions.yml", emit: versions path "*_data", emit: data path "*_plots", optional:true, emit: plots script: def args = task.ext.args ?: '' + def disable_pmultqic = params.enable_pmultiqc ? "": "--disable_plugin" """ multiqc \\ - --exp_design ${expdesign} \\ - --mzMLs ./mzMLs \\ - --raw_ids ./raw_ids \\ + -f \\ --config ./results/multiqc_config.yml \\ + ${args} \\ + ${disable_pmultqic} \\ ./results \\ -o . diff --git a/nextflow.config b/nextflow.config index 3a1fba9d..b2dea632 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,88 +10,85 @@ params { // Workflow flags - input = null // the sdrf and spectra parameters are inferred from this one - root_folder = null - local_input_type = null - database = null - acqusition_method = null + input = null // the sdrf and spectra parameters are inferred from this one + root_folder = null + local_input_type = null + database = null + acquisition_method = null // Tools flags - posterior_probabilities = 'percolator' - add_decoys = false - enable_pmultiqc = true - search_engines = 'comet' - psm_pep_fdr_cutoff = 0.10 + posterior_probabilities = 'percolator' + add_decoys = false + enable_pmultiqc = true + search_engines = 'comet' + psm_pep_fdr_cutoff = 0.10 protein_level_fdr_cutoff = 0.05 // Debug level - decoydatabase_debug = 0 - pp_debug = 0 - idfilter_debug = 0 - idscoreswitcher_debug = 0 - iso_debug = 0 - db_debug = 0 - idpep_debug = 0 - percolator_debug = 0 - consensusid_debug = 0 - idmapper_debug = 0 - filemerge_debug = 0 - conflict_resolver_debug = 0 - luciphor_debug = 0 + decoydatabase_debug = 0 + pp_debug = 0 + idfilter_debug = 0 + idscoreswitcher_debug = 0 + iso_debug = 0 + db_debug = 0 + idpep_debug = 0 + percolator_debug = 0 + consensusid_debug = 0 + idmapper_debug = 0 + luciphor_debug = 0 protein_inference_debug = 0 - proteinquant_debug = 0 - inf_quant_debug = 0 + plfq_debug = 0 // decoys - decoy_string = 'DECOY_' - decoy_string_position = 'prefix' - decoy_method = 'reverse' - shuffle_max_attempts = 30 + decoy_string = 'DECOY_' + decoy_string_position = 'prefix' + decoy_method = 'reverse' + shuffle_max_attempts = 30 shuffle_sequence_identity_threshold = 0.5 // peak picking if used - openms_peakpicking = false - peakpicking_inmemory = false + openms_peakpicking = false + peakpicking_inmemory = false peakpicking_ms_levels = null // means all/auto //Isobaric analyze - labelling_type = null - reference_channel = 126 - min_precursor_intensity = 1.0 - reporter_mass_shift = 0.002 - select_activation = 'HCD' - isotope_correction = true - iso_normalization = false - min_reporter_intensity = 0.0 - min_precursor_purity = 0.0 + labelling_type = null + reference_channel = 126 + min_precursor_intensity = 1.0 + reporter_mass_shift = 0.002 + select_activation = 'HCD' + isotope_correction = true + iso_normalization = false + min_reporter_intensity = 0.0 + min_precursor_purity = 0.0 precursor_isotope_deviation = 10.0 // shared search engine parameters - enzyme = 'Trypsin' - num_enzyme_termini = 'fully' - allowed_missed_cleavages = 2 - precursor_mass_tolerance = 5 + enzyme = 'Trypsin' + num_enzyme_termini = 'fully' + allowed_missed_cleavages = 2 + precursor_mass_tolerance = 5 precursor_mass_tolerance_unit = 'ppm' - fixed_mods = 'Carbamidomethyl (C)' - variable_mods = 'Oxidation (M)' - enable_mod_localization = false - mod_localization = 'Phospho (S),Phospho (T),Phospho (Y)' - fragment_mass_tolerance = 0.03 - fragment_mass_tolerance_unit = 'Da' - fragment_method = 'HCD' //currently unused. hard to find a good logic to beat the defaults - isotope_error_range = '0,1' - instrument = null //auto-determined from tolerances - protocol = 'automatic' - min_precursor_charge = 2 - max_precursor_charge = 4 - min_peptide_length = 6 - max_peptide_length = 40 - num_hits = 1 - max_mods = 3 + fixed_mods = 'Carbamidomethyl (C)' + variable_mods = 'Oxidation (M)' + enable_mod_localization = false + mod_localization = 'Phospho (S),Phospho (T),Phospho (Y)' + fragment_mass_tolerance = 0.03 + fragment_mass_tolerance_unit = 'Da' + fragment_method = 'HCD' //currently unused. hard to find a good logic to beat the defaults + isotope_error_range = '0,1' + instrument = null //auto-determined from tolerances + protocol = 'automatic' + min_precursor_charge = 2 + max_precursor_charge = 4 + min_peptide_length = 6 + max_peptide_length = 40 + num_hits = 1 + max_mods = 3 // PeptideIndexer flags - IL_equivalent = true - allow_unmatched = false + IL_equivalent = true + unmatched_action = "warn" // IDPEP flags outlier_handling = "none" @@ -100,72 +97,70 @@ params { protein = false // Percolator flags - train_FDR = 0.05 - test_FDR = 0.05 - FDR_level = 'peptide-level-fdrs' - klammer = false + train_FDR = 0.05 + test_FDR = 0.05 + FDR_level = 'peptide-level-fdrs' + klammer = false description_correct_features = 0 - subset_max_train = 300000 + subset_max_train = 300000 // ConsensusID - consensusid_algorithm = 'best' - min_consensus_support = 0 + consensusid_algorithm = 'best' + min_consensus_support = 0 consensusid_considered_top_hits = 0 // Luciphor options - luciphor_neutral_losses = null - luciphor_decoy_mass = null + luciphor_neutral_losses = null + luciphor_decoy_mass = null luciphor_decoy_neutral_losses = null // Epifany - protein_inference_bayesian = false - greedy_group_resolution = 'none' - top_PSMs = 1 - update_PSM_probabilities = true - keep_best_PSM_only = true + top_PSMs = 1 + update_PSM_probabilities = false // Protein inference - picked_fdr = true - protein_score = 'best' // Other options are "Best", "Product", "Sum" + picked_fdr = true + protein_score = 'best' // Other options are "Best", "Product", "Sum" min_peptides_per_protein = 1 + use_shared_peptides = true // ProteinQuantifier - top = 3 - average = 'median' + top = 3 + average = 'median' best_charge_and_fraction = false - normalize = false - ratios = false - fix_peptides = false - include_all = true + normalize = false + ratios = false + fix_peptides = false + include_all = true // ProteomicsLFQ flags protein_inference_method = 'aggregation' - protein_quant = 'unique_peptides' - quantification_method = 'feature_intensity' - targeted_only = true - mass_recalibration = false - transfer_ids = 'false' - alignment_order = 'star' - add_triqler_output = false - quantify_decoys = false + protein_quant = 'unique_peptides' + quantification_method = 'feature_intensity' + targeted_only = true + mass_recalibration = false + transfer_ids = 'false' + alignment_order = 'star' + add_triqler_output = false + quantify_decoys = false // DIA-NN - matrix_spec_q = 0.01 - diann_debug = 3 - min_pr_mz = null - max_pr_mz = null - min_fr_mz = null - max_fr_mz = null + matrix_spec_q = 0.01 + diann_debug = 3 + // TODO think about unifying it with DDA parameters + min_pr_mz = null + max_pr_mz = null + min_fr_mz = null + max_fr_mz = null diann_normalize = true // MSstats - msstatsconverter_debug = 0 skip_post_msstats = false - ref_condition = null - contrasts = 'pairwise' + ref_condition = null + contrasts = 'pairwise' // PTXQC - enable_qc = false + enable_qc = false ptxqc_report_layout = null // MultiQC options @@ -174,7 +169,7 @@ params { max_multiqc_email_size = '25.MB' // Boilerplate options - outdir = './results' + outdir = null publish_dir_mode = 'copy' tracedir = "${params.outdir}/pipeline_info" email = null diff --git a/nextflow_schema.json b/nextflow_schema.json index b035553d..c5f081dc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,8 +14,8 @@ "properties": { "input": { "type": "string", - "description": "URI/path to an [SDRF](https://github.com/bigbio/proteomics-metadata-standard/tree/master/annotated-projects) file **OR** globbing pattern for URIs/paths of mzML or Thermo RAW files", - "help_text": "The input to the pipeline can be specified in two **mutually exclusive** ways:\n - using a path or URI to a PRIDE Sample to Data Relation Format file (SDRF), e.g. as part of a submitted and\nannotated PRIDE experiment (see [here](https://github.com/bigbio/proteomics-metadata-standard/tree/master/annotated-projects) for examples). Input files will be downloaded and cached from the URIs specified in the SDRF file.\nAn OpenMS-style experimental design will be generated based on the factor columns of the SDRF. The settings for the\nfollowing parameters will currently be overwritten by the ones specified in the SDRF:\n\n * `fixed_mods`,\n * `variable_mods`,\n * `precursor_mass_tolerance`,\n * `precursor_mass_tolerance_unit`,\n * `fragment_mass_tolerance`,\n * `fragment_mass_tolerance_unit`,\n * `fragment_method`,\n * `enzyme`\n - by specifying globbing patterns to the input spectrum files in Thermo RAW or mzML format (e.g. `/data/experiment{1,2,3}_rep*.mzML`). An experimental design should be provided with the `expdesign` parameter.", + "description": "URI/path to an [SDRF](https://github.com/bigbio/proteomics-metadata-standard/tree/master/annotated-projects) file (.sdrf.tsv) **OR** [OpenMS-style experimental design](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/classOpenMS_1_1ExperimentalDesign.html#details) with paths to spectra files (.tsv)", + "help_text": "Input is specified by using a path or URI to a PRIDE Sample to Data Relation Format file (SDRF), e.g. as part of a submitted and\nannotated PRIDE experiment (see [here](https://github.com/bigbio/proteomics-metadata-standard/tree/master/annotated-projects) for examples). Input files will be downloaded and cached from the URIs specified in the SDRF file.\nAn OpenMS-style experimental design will be generated based on the factor columns of the SDRF. The settings for the\nfollowing parameters will currently be overwritten by the ones specified in the SDRF:\n\n * `fixed_mods`,\n * `variable_mods`,\n * `precursor_mass_tolerance`,\n * `precursor_mass_tolerance_unit`,\n * `fragment_mass_tolerance`,\n * `fragment_mass_tolerance_unit`,\n * `fragment_method`,\n * `enzyme`\n You can also specify an [OpenMS-style experimental design](https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/classOpenMS_1_1ExperimentalDesign.html#details) directly (.tsv ending). In this case, the aforementioned parameters have to be specified or defaults will be used.", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -35,29 +35,20 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" - } - } - }, - "main_parameters_sdrf": { - "title": "Main parameters (SDRF)", - "type": "object", - "description": "In case your input was an SDRF files, the following optional parameters can be set.", - "default": "", - "properties": { + }, "root_folder": { "type": "string", - "description": "Root folder in which the spectrum files specified in the SDRF are searched", + "description": "Root folder in which the spectrum files specified in the SDRF/design are searched", "fa_icon": "fas fa-folder", - "help_text": "This optional parameter can be used to specify a root folder in which the spectrum files specified in the SDRF are searched.\nIt is usually used if you have a local version of the experiment already. Note that this option does not support recursive\nsearching yet." + "help_text": "This optional parameter can be used to specify a root folder in which the spectrum files specified in the SDRF/design are searched.\nIt is usually used if you have a local version of the experiment already. Note that this option does not support recursive\nsearching yet." }, "local_input_type": { "type": "string", - "description": "Overwrite the file type/extension of the filename as specified in the SDRF", + "description": "Overwrite the file type/extension of the filename as specified in the SDRF/design", "fa_icon": "fas fa-file-invoice", - "help_text": "If the above [`--root_folder`](#params_root_folder) was given to load local input files, this overwrites the file type/extension of\nthe filename as specified in the SDRF. Usually used in case you have an mzML-converted version of the files already. Needs to be\none of 'mzML' or 'raw' (the letter cases should match your files exactly)." + "help_text": "If the above [`--root_folder`](#params_root_folder) was given to load local input files, this overwrites the file type/extension of\nthe filename as specified in the SDRF/design. Usually used in case you have an mzML-converted version of the files already. Needs to be\none of 'mzML' or 'raw' (the letter cases should match your files exactly)." } - }, - "fa_icon": "far fa-chart-bar" + } }, "protein_database": { "title": "Protein database", @@ -148,98 +139,6 @@ }, "fa_icon": "far fa-chart-bar" }, - "isobaric_analyzer": { - "title": "Isobaric analyzer", - "type": "object", - "description": "Extracts and normalizes labeling information", - "default": "", - "properties": { - "labelling_type": { - "type": "string", - "description": "label method", - "fa_icon": "fas fa-font", - "help_text": "Isobaric Quantification method used in the experiment", - "enum": [ - "label free sample", - "itraq4plex", - "itraq8plex", - "tmt6plex", - "tmt10plex", - "tmt11plex", - "tmt16plex" - ] - }, - "select_activation": { - "type": "string", - "description": "Operate only on MSn scans where any of its precursors features a certain activation method.", - "fa_icon": "fas fa-font", - "help_text": "Operate only on MSn scans where any of its precursors features a certain activation method. Set to empty to disable", - "enum": ["HCD", "CID", "ETD", "ECD"] - }, - "reporter_mass_shift": { - "type": "number", - "description": "Allowed shift (left to right) in Th from the expected position", - "default": 0.002, - "fa_icon": "fas fa-sliders-h", - "help_text": "Allowed shift (left to right) in Th from the expected position." - }, - "min_precursor_intensity": { - "type": "number", - "description": "Minimum intensity of the precursor to be extracted", - "default": 1.0, - "fa_icon": "fas fa-sliders-h" - }, - "min_precursor_purity": { - "type": "number", - "description": "Minimum fraction of the total intensity. 0.0:1.0", - "default": 0.0, - "fa_icon": "fas fa-sliders-h", - "help_text": "Minimum fraction of the total intensity in the isolation window of the precursor spectrum" - }, - "min_reporter_intensity": { - "type": "number", - "description": "Minimum intensity of the individual reporter ions to be extracted.", - "default": 0.0, - "fa_icon": "fas fa-sliders-h", - "help_text": "Minimum intensity of the individual reporter ions to be extracted." - }, - "precursor_isotope_deviation": { - "type": "number", - "description": "Maximum allowed deviation (in ppm) betweed theoretical and observed isotopic peaks of the precursor peak", - "default": 10.0, - "fa_icon": "fas fa-sliders-h", - "help_text": "Maximum allowed deviation (in ppm) betweed theoretical and observed isotopic peaks of the precursor peak." - }, - "isotope_correction": { - "type": "boolean", - "description": "Enable isotope correction (highly recommended)", - "default": true, - "fa_icon": "fas fa-toggle-on" - }, - "iso_normalization": { - "type": "boolean", - "description": "Enable normalization of the channel intensities", - "default": false, - "fa_icon": "fas fa-toggle-on", - "help_text": "The normalization is done by using the Median of Rations.Also the ratios the medians is provided as control measure." - }, - "reference_channel": { - "type": "integer", - "description": "The reference channel", - "fa_icon": "fas fa-list-ol", - "help_text": "The reference channel", - "default": 126 - }, - "iso_debug": { - "type": "integer", - "description": "set the debug level", - "fa_icon": "fas fa-bug", - "default": 0, - "hidden": true - } - }, - "fa_icon": "far fa-chart-bar" - }, "database_search": { "title": "Database search", "type": "object", @@ -440,19 +339,18 @@ "description": "", "default": "", "properties": { - "allow_unmatched": { - "type": "boolean", - "description": "Do not fail if there are some unmatched peptides. Only activate as last resort, if you know that the rest of your settings are fine!", - "default": false, + "unmatched_action": { + "type": "string", + "description": "What to do when peptides are found that do not follow a unified set of rules (since search engines sometimes differ in their interpretation of them). ", + "default": "warn", "fa_icon": "far fa-check-square", - "enum": [false, true] + "enum": ["warn", "error", "remove"] }, "IL_equivalent": { "type": "boolean", "description": "Should isoleucine and leucine be treated interchangeably when mapping search engine hits to the database? Default: true", "default": true, - "fa_icon": "far fa-check-square", - "enum": [true, false] + "fa_icon": "far fa-check-square" } }, "fa_icon": "fas fa-project-diagram" @@ -633,48 +531,46 @@ } } }, - "file_merge": { - "title": "File Merge", - "type": "object", - "description": "Merge multiple files", - "default": "", - "properties": { - "filemerge_debug": { - "type": "integer", - "description": "Debug level for FileMerger step. Increase for verbose logging", - "fa_icon": "fas fa-bug", - "default": 0, - "hidden": true - } - } - }, "protein_inference": { "title": "Protein inference", "type": "object", "description": "To group proteins, calculate scores on the protein (group) level and to potentially modify associations from peptides to proteins.", "default": "", "properties": { - "protein_inference_bayesian": { - "type": "boolean", - "description": "Beyesian is used if protein inference is done with Epifany", - "default": false, + "protein_inference_method": { + "type": "string", + "description": "The inference method to use. 'aggregation' (default) or 'bayesian'.", + "default": "aggregation", "fa_icon": "fas fa-list-ol", - "hidden": true + "help_text": "Infer proteins through:\n\n* 'aggregation' = aggregates all peptide scores across a protein (by calculating the maximum) (default)\n* 'bayesian' = compute a posterior probability for every protein based on a Bayesian network (i.e. using Epifany)\n* ('percolator' not yet supported)\n\n**Note:** If protein grouping is performed also depends on the `protein_quant` parameter (i.e. if peptides have to be unique or unique to a group only)", + "enum": ["aggregation", "bayesian"] + }, + "protein_level_fdr_cutoff": { + "type": "number", + "description": "The experiment-wide protein (group)-level FDR cutoff. Default: 0.05", + "default": 0.01, + "fa_icon": "fas fa-filter", + "help_text": "This can be protein level if 'strictly_unique_peptides' are used for protein quantification. See [`--protein_quant`](#params_protein_quant)" }, "picked_fdr": { "type": "boolean", "description": "Use picked protein FDRs", "default": true, - "fa_icon": "fas fa-list-ol", - "hidden": true + "fa_icon": "fas fa-list-ol" }, "protein_score": { "type": "string", - "description": "How to aggregate scores of peptides matching to the same protein", + "description": "[Ignored in Bayesian] How to aggregate scores of peptides matching to the same protein", "default": "best", - "enum": ["best", "product", "sum", "maximum"], + "enum": ["best", "product", "sum"], "fa_icon": "fas fa-list-ol" }, + "use_shared_peptides": { + "type": "boolean", + "description": "[Ignored in Bayesian] Also use shared peptides during score aggregation to protein level", + "default": true, + "fa_icon": "fas fa-filter" + }, "min_peptides_per_protein": { "type": "integer", "description": "[Ignored in Bayesian] Minimum number of peptides needed for a protein identification", @@ -683,46 +579,20 @@ }, "top_PSMs": { "type": "integer", - "description": "Consider only top X PSMs per spectrum. 0 considers all.(Default 1).", + "description": "Consider only the top X PSMs per spectrum to find the best PSM per peptide. 0 considers all.", "default": 1, "fa_icon": "fas fa-filter" }, - "greedy_group_resolution": { - "type": "string", - "description": "Post-process inference output with greedy resolution of shared peptides based on the parent protein probabilities. Also adds the resolved ambiguity groups to output.", - "default": "none", - "enum": ["none", "remove_associations_only", "remove_proteins_wo_evidence"] - }, - "keep_best_PSM_only": { - "type": "boolean", - "description": "Epifany uses the best PSM per peptide for inference. Discard the rest (true) or keepe.g. for quantification/reporting?", - "default": true, - "fa_icon": "fas fa-list-ol" - }, "update_PSM_probabilities": { "type": "boolean", - "description": "(Experimental:) Update PSM probabilities with their posteriors under consideration of the protein probabilities.", - "default": true, - "fa_icon": "fas fa-list-ol" - }, - "protein_level_fdr_cutoff": { - "type": "number", - "description": "The experiment-wide protein (group)-level FDR cutoff. Default: 0.05", - "default": 0.01, - "fa_icon": "fas fa-filter", - "help_text": "This can be protein level if 'strictly_unique_peptides' are used for protein quantification. See [`--protein_quant`](#params_protein_quant)" - }, - "protein_inference_method": { - "type": "string", - "description": "The inference method to use. 'aggregation' (default) or 'bayesian'.", - "default": "aggregation", + "description": "[Bayesian-only; Experimental] Update PSM probabilities with their posteriors under consideration of the protein probabilities.", + "default": false, "fa_icon": "fas fa-list-ol", - "help_text": "Infer proteins through:\n\n* 'aggregation' = aggregates all peptide scores across a protein (by calculating the maximum) (default)\n* 'bayesian' = compute a posterior probability for every protein based on a Bayesian network (i.e. using Epifany)\n* ('percolator' not yet supported)\n\n**Note:** If protein grouping is performed also depends on the `protein_quant` parameter (i.e. if peptides have to be unique or unique to a group only)", - "enum": ["aggregation", "bayesian"] + "hidden": true }, "protein_inference_debug": { "type": "integer", - "description": "Debug level for ProteinInferencer step. Increase for verbose logging", + "description": "Debug level for the protein inference step. Increase for verbose logging", "fa_icon": "fas fa-bug", "default": 0, "hidden": true @@ -730,19 +600,12 @@ }, "fa_icon": "fab fa-hubspot" }, - "protein_quantification": { - "title": "Protein Quantification", + "protein_quantification_dda": { + "title": "Protein Quantification (DDA)", "type": "object", - "description": "", + "description": "General protein quantification settings for both LFQ and Isobaric labelling.", "default": "", "properties": { - "conflict_resolver_debug": { - "type": "integer", - "description": "Debug level for IDConflictResolver step. Increase for verbose logging", - "fa_icon": "fas fa-bug", - "default": 0, - "hidden": true - }, "top": { "type": "integer", "description": "Calculate protein abundance from this number of proteotypic peptides (most abundant first; '0' for all, Default 3)", @@ -787,11 +650,20 @@ }, "protein_quant": { "type": "string", - "description": "Quantify proteins based on:\n\n* 'unique_peptides' = use peptides mapping to single proteins or a group of indistinguishable proteins (according to the set of experimentally identified peptides)\n* 'strictly_unique_peptides' = use peptides mapping to a unique single protein only\n* 'shared_peptides' = use shared peptides, too, but only greedily for its best group (by inference score)", + "description": "Quantify proteins based on:\n\n* 'unique_peptides' = use peptides mapping to single proteins or a group of indistinguishable proteins (according to the set of experimentally identified peptides)\n* 'strictly_unique_peptides' (only LFQ) = use peptides mapping to a unique single protein only\n* 'shared_peptides' = use shared peptides, too, but only greedily for its best group (by inference score and nr. of peptides)", "default": "unique_peptides", "enum": ["unique_peptides", "strictly_unique_peptides", "shared_peptides"], "fa_icon": "fas fa-list-ol" - }, + } + }, + "fa_icon": "fas fa-braille" + }, + "protein_quantification_lfq": { + "title": "Protein Quantification (LFQ)", + "type": "object", + "description": "", + "default": "", + "properties": { "quantification_method": { "type": "string", "description": "Choose between feature-based quantification based on integrated MS1 signals ('feature_intensity'; default) or spectral counting of PSMs ('spectral_counting'). **WARNING:** 'spectral_counting' is not compatible with our MSstats step yet. MSstats will therefore be disabled automatically with that choice.", @@ -830,29 +702,102 @@ "default": false, "fa_icon": "far fa-check-square" }, - "inf_quant_debug": { + "plfq_debug": { "type": "integer", "description": "Debug level when running the re-scoring. Logs become more verbose and at '>666' potentially very large temporary files are kept.", "fa_icon": "fas fa-bug", "hidden": true, "default": 0 + } + }, + "fa_icon": "fas fa-braille" + }, + "isobaric_analyzer": { + "title": "Isobaric analyzer", + "type": "object", + "description": "Extracts and normalizes labeling information", + "default": "", + "properties": { + "labelling_type": { + "type": "string", + "description": "Isobaric labelling method. Will be ignored if SDRF was given.", + "fa_icon": "fas fa-font", + "help_text": "Isobaric Quantification method used in the experiment.", + "enum": [ + "label free sample", + "itraq4plex", + "itraq8plex", + "tmt6plex", + "tmt10plex", + "tmt11plex", + "tmt16plex" + ] + }, + "select_activation": { + "type": "string", + "description": "Operate only on MSn scans where any of its precursors features a certain activation method. Set to empty to disable.", + "fa_icon": "fas fa-font", + "enum": ["HCD", "CID", "ETD", "ECD"] + }, + "reporter_mass_shift": { + "type": "number", + "description": "Allowed shift (left to right) in Th from the expected position", + "default": 0.002, + "fa_icon": "fas fa-sliders-h" + }, + "min_precursor_intensity": { + "type": "number", + "description": "Minimum intensity of the precursor to be extracted", + "default": 1.0, + "fa_icon": "fas fa-sliders-h" + }, + "min_precursor_purity": { + "type": "number", + "description": "Minimum fraction of the total intensity. 0.0:1.0", + "default": 0.0, + "fa_icon": "fas fa-sliders-h", + "help_text": "Minimum fraction of the total intensity in the isolation window of the precursor spectrum" + }, + "min_reporter_intensity": { + "type": "number", + "description": "Minimum intensity of the individual reporter ions to be extracted.", + "default": 0.0, + "fa_icon": "fas fa-sliders-h" + }, + "precursor_isotope_deviation": { + "type": "number", + "description": "Maximum allowed deviation (in ppm) between theoretical and observed isotopic peaks of the precursor peak", + "default": 10.0, + "fa_icon": "fas fa-sliders-h" }, - "proteinquant_debug": { + "isotope_correction": { + "type": "boolean", + "description": "Enable isotope correction (highly recommended)", + "default": true, + "fa_icon": "fas fa-toggle-on" + }, + "iso_normalization": { + "type": "boolean", + "description": "Enable normalization of the channel intensities", + "default": false, + "fa_icon": "fas fa-toggle-on", + "help_text": "The normalization is done by using the Median of Ratios. Also the ratios the medians is provided as control measure." + }, + "reference_channel": { "type": "integer", - "description": "Debug level when running ProteinQuantifier.", - "fa_icon": "fas fa-bug", - "default": 0, - "hidden": true + "description": "The reference channel, e.g. for calculating ratios.", + "fa_icon": "fas fa-list-ol", + "default": 126 }, - "msstatsconverter_debug": { + "iso_debug": { "type": "integer", - "description": "Debug level when running the re-scoring. Logs become more verbose and at '>666' potentially very large temporary files are kept.", + "description": "Set the debug level", "fa_icon": "fas fa-bug", "default": 0, "hidden": true } }, - "fa_icon": "fas fa-braille" + "fa_icon": "far fa-chart-bar" }, "DIA-NN": { "title": "DIA-NN", @@ -860,9 +805,9 @@ "description": "Settings for DIA-NN - a universal software for data-independent acquisition (DIA) proteomics data processing.", "default": "", "properties": { - "acqusition_method": { + "acquisition_method": { "type": "string", - "description": "Proteomics data acquistion method", + "description": "Proteomics data acquisition method", "default": "dda", "enum": ["dda", "dia"], "fa_icon": "far fa-list-ol" @@ -903,7 +848,7 @@ }, "diann_normalize": { "type": "boolean", - "description": "Enable cross-run normalisation between runs by diann.", + "description": "Enable cross-run normalization between runs by diann.", "default": true, "fa_icon": "far fa-check-square" } @@ -1149,18 +1094,12 @@ { "$ref": "#/definitions/input_output_options" }, - { - "$ref": "#/definitions/main_parameters_sdrf" - }, { "$ref": "#/definitions/protein_database" }, { "$ref": "#/definitions/spectrum_preprocessing" }, - { - "$ref": "#/definitions/isobaric_analyzer" - }, { "$ref": "#/definitions/database_search" }, @@ -1186,13 +1125,16 @@ "$ref": "#/definitions/feature_mapper" }, { - "$ref": "#/definitions/file_merge" + "$ref": "#/definitions/protein_inference" + }, + { + "$ref": "#/definitions/protein_quantification_dda" }, { - "$ref": "#/definitions/protein_inference" + "$ref": "#/definitions/protein_quantification_lfq" }, { - "$ref": "#/definitions/protein_quantification" + "$ref": "#/definitions/isobaric_analyzer" }, { "$ref": "#/definitions/DIA-NN" diff --git a/subworkflows/local/create_input_channel.nf b/subworkflows/local/create_input_channel.nf index 603b6c5f..7fccb3c4 100644 --- a/subworkflows/local/create_input_channel.nf +++ b/subworkflows/local/create_input_channel.nf @@ -6,7 +6,7 @@ include { PREPROCESS_EXPDESIGN } from '../../modules/local/preprocess_expdesign' class Wrapper { def labelling_type = "" - def acqusition_method = "" + def acquisition_method = "" } workflow CREATE_INPUT_CHANNEL { @@ -36,12 +36,12 @@ workflow CREATE_INPUT_CHANNEL { // TODO remove. We can't use the variable to direct channels anyway wrapper = new Wrapper() wrapper.labelling_type = "" - wrapper.acqusition_method = "" + wrapper.acquisition_method = "" ch_in_design.splitCsv(header: true, sep: '\t') .map { create_meta_channel(it, is_sdrf, enzymes, files, wrapper) } .branch { - ch_meta_config_dia: it[0].acqusition_method.contains("dia") + ch_meta_config_dia: it[0].acquisition_method.contains("dia") ch_meta_config_iso: it[0].labelling_type.contains("tmt") || it[0].labelling_type.contains("itraq") ch_meta_config_lfq: it[0].labelling_type.contains("label free") } @@ -55,7 +55,6 @@ workflow CREATE_INPUT_CHANNEL { ch_meta_config_lfq // [meta, [spectra_files ]] ch_meta_config_dia // [meta, [spectra files ]] ch_expdesign - wrapper.labelling_type version = ch_versions } @@ -103,17 +102,17 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) { meta.fragmentmasstolerance = params.fragment_mass_tolerance meta.fragmentmasstoleranceunit = params.fragment_mass_tolerance_unit meta.enzyme = params.enzyme - meta.acqusition_method = params.acqusition_method + meta.acquisition_method = params.acquisition_method } else { if (row["Proteomics Data Acquisition Method"].contains("Data-Dependent Acquisition")) { - meta.acqusition_method = "dda" + meta.acquisition_method = "dda" } else if (row["Proteomics Data Acquisition Method"].contains("Data-Independent Acquisition")){ - meta.acqusition_method = "dia" + meta.acquisition_method = "dia" } else { log.error "Currently DIA and DDA are supported for the pipeline. Check and Fix your SDRF." exit 1 } - wrapper.acqusition_method = meta.acqusition_method + wrapper.acquisition_method = meta.acquisition_method meta.labelling_type = row.Label meta.dissociationmethod = row.DissociationMethod meta.fixedmodifications = row.FixedModifications @@ -132,8 +131,8 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) { exit 1 } } - if (meta.acqusition_method == "dia") { - log.warn "Acqusition Method: '${meta.acqusition_method}'" + if (meta.acquisition_method == "dia") { + log.warn "Acquisition Method: '${meta.acquisition_method}'" } else { log.warn "Label: '${meta.labelling_type}'" if (wrapper.labelling_type.equals("")) { @@ -151,7 +150,7 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) { } } - if (wrapper.labelling_type.contains("label free") || meta.acqusition_method == "dia") { + if (wrapper.labelling_type.contains("label free") || meta.acquisition_method == "dia") { if (filestr in files) { log.error "Currently only one search engine setting/DIA-NN setting per file is supported for the whole experiment. ${filestr} has multiple entries in your SDRF. Maybe you have a (isobaric) labelled experiment? Otherwise, consider splitting your design into multiple experiments." exit 1 diff --git a/subworkflows/local/databasesearchengines.nf b/subworkflows/local/databasesearchengines.nf index 0224b674..ea7f0bf5 100644 --- a/subworkflows/local/databasesearchengines.nf +++ b/subworkflows/local/databasesearchengines.nf @@ -4,7 +4,6 @@ include { SEARCHENGINEMSGF } from '../../modules/local/openms/thirdparty/searchenginemsgf/main' include { SEARCHENGINECOMET} from '../../modules/local/openms/thirdparty/searchenginecomet/main' -include { INDEXPEPTIDES } from '../../modules/local/openms/indexpeptides/main' workflow DATABASESEARCHENGINES { take: @@ -26,11 +25,8 @@ workflow DATABASESEARCHENGINES { ch_id_comet = ch_id_comet.mix(SEARCHENGINECOMET.out.id_files_comet) } - INDEXPEPTIDES(ch_id_msgf.mix(ch_id_comet).combine(searchengine_in_db)) - ch_versions = ch_versions.mix(INDEXPEPTIDES.out.version) - emit: - ch_id_files_idx = INDEXPEPTIDES.out.id_files_idx + ch_id_files_idx = ch_id_msgf.mix(ch_id_comet) versions = ch_versions } diff --git a/subworkflows/local/proteininference.nf b/subworkflows/local/proteininference.nf index 05540e8f..5ee3f5f1 100644 --- a/subworkflows/local/proteininference.nf +++ b/subworkflows/local/proteininference.nf @@ -4,7 +4,7 @@ include { EPIFANY } from '../../modules/local/openms/epifany/main' include { PROTEININFERENCE as PROTEININFERENCER} from '../../modules/local/openms/proteininference/main' -include { IDFILTER as EPIFILTER } from '../../modules/local/openms/idfilter/main' +include { IDFILTER } from '../../modules/local/openms/idfilter/main' workflow PROTEININFERENCE { take: @@ -13,19 +13,19 @@ workflow PROTEININFERENCE { main: ch_version = Channel.empty() - if (params.protein_inference_bayesian) { + if (params.protein_inference_method == "bayesian") { EPIFANY(consus_file) ch_version = ch_version.mix(EPIFANY.out.version) - ch_epifilter = EPIFANY.out.epi_inference + ch_inference = EPIFANY.out.epi_inference } else { PROTEININFERENCER(consus_file) ch_version = ch_version.mix(PROTEININFERENCER.out.version) - ch_epifilter = PROTEININFERENCER.out.protein_inference + ch_inference = PROTEININFERENCER.out.protein_inference } - EPIFILTER(ch_epifilter) - ch_version = ch_version.mix(EPIFILTER.out.version) - EPIFILTER.out.id_filtered + IDFILTER(ch_inference) + ch_version = ch_version.mix(IDFILTER.out.version) + IDFILTER.out.id_filtered .multiMap{ it -> meta: it[0] results: it[1] diff --git a/workflows/quantms.nf b/workflows/quantms.nf index 0064f9e5..2493ddbf 100644 --- a/workflows/quantms.nf +++ b/workflows/quantms.nf @@ -42,7 +42,7 @@ include { PMULTIQC as SUMMARYPIPELINE } from '../modules/local/pmultiqc/main' // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { FILE_PREPARATION as FILE_PREPARATION_LFQ; FILE_PREPARATION as FILE_PREPARATION_TMT; FILE_PREPARATION as FILE_PREPARATION_DIA } from '../subworkflows/local/file_preparation' +include { FILE_PREPARATION } from '../subworkflows/local/file_preparation' include { CREATE_INPUT_CHANNEL } from '../subworkflows/local/create_input_channel' /* @@ -91,57 +91,43 @@ workflow QUANTMS { // // SUBWORKFLOW: File preparation // - FILE_PREPARATION_TMT ( - CREATE_INPUT_CHANNEL.out.ch_meta_config_iso - ) - FILE_PREPARATION_LFQ ( - CREATE_INPUT_CHANNEL.out.ch_meta_config_lfq - ) - - FILE_PREPARATION_DIA ( - CREATE_INPUT_CHANNEL.out.ch_meta_config_dia + FILE_PREPARATION ( + CREATE_INPUT_CHANNEL.out.ch_meta_config_iso.mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_lfq).mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_dia) ) - ch_versions = ch_versions.mix(FILE_PREPARATION_TMT.out.version.ifEmpty(null)) - ch_versions = ch_versions.mix(FILE_PREPARATION_LFQ.out.version.ifEmpty(null)) - ch_versions = ch_versions.mix(FILE_PREPARATION_DIA.out.version.ifEmpty(null)) + ch_versions = ch_versions.mix(FILE_PREPARATION.out.version.ifEmpty(null)) - FILE_PREPARATION_LFQ.out.results + FILE_PREPARATION.out.results .map { it -> it[1] } - .set { ch_pmultiqc_mzmls_lfq } + .set { ch_pmultiqc_mzmls } - FILE_PREPARATION_TMT.out.results - .map { it -> it[1] } - .set { ch_pmultiqc_mzmls_iso } + FILE_PREPARATION.out.results + .branch { + dia: it[0].acquisition_method.contains("dia") + iso: it[0].labelling_type.contains("tmt") || it[0].labelling_type.contains("itraq") + lfq: it[0].labelling_type.contains("label free") + } + .set{ch_fileprep_result} - FILE_PREPARATION_DIA.out.results - .map { it -> it[1] } - .set { ch_pmultiqc_mzmls_dia } - - ch_pmultiqc_mzmls = ch_pmultiqc_mzmls_lfq.mix(ch_pmultiqc_mzmls_iso).mix(ch_pmultiqc_mzmls_dia) // // WORKFLOW: Run main nf-core/quantms analysis pipeline based on the quantification type // - // TODO if we ever allow mixed labelling types, we need to split the files according to meta.labelling_type which contains - // labelling_type per file ch_pipeline_results = Channel.empty() ch_ids_pmultiqc = Channel.empty() - TMT(FILE_PREPARATION_TMT.out.results, CREATE_INPUT_CHANNEL.out.ch_expdesign) + TMT(ch_fileprep_result.iso, CREATE_INPUT_CHANNEL.out.ch_expdesign) ch_ids_pmultiqc = ch_ids_pmultiqc.mix(TMT.out.ch_pmultiqc_ids) ch_pipeline_results = ch_pipeline_results.mix(TMT.out.final_result) ch_versions = ch_versions.mix(TMT.out.versions.ifEmpty(null)) - LFQ(FILE_PREPARATION_LFQ.out.results, CREATE_INPUT_CHANNEL.out.ch_expdesign) + LFQ(ch_fileprep_result.lfq, CREATE_INPUT_CHANNEL.out.ch_expdesign) ch_ids_pmultiqc = ch_ids_pmultiqc.mix(LFQ.out.ch_pmultiqc_ids) ch_pipeline_results = ch_pipeline_results.mix(LFQ.out.final_result) ch_versions = ch_versions.mix(LFQ.out.versions.ifEmpty(null)) - DIA(FILE_PREPARATION_DIA.out.results, CREATE_INPUT_CHANNEL.out.ch_expdesign) - - // ch_ids_pmultiqc = ch_ids_pmultiqc.mix(DIA.out.ch_pmultiqc_ids) - // ch_pipeline_results = ch_pipeline_results.mix(DIA.out.final_result) + DIA(ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign) + ch_pipeline_results = ch_pipeline_results.mix(DIA.out.diann_report) ch_versions = ch_versions.mix(DIA.out.versions.ifEmpty(null)) @@ -166,10 +152,10 @@ workflow QUANTMS { ch_multiqc_quantms_logo = file("$projectDir/assets/nf-core-quantms_logo_light.png") SUMMARYPIPELINE ( - CREATE_INPUT_CHANNEL.out.ch_expdesign, - ch_pmultiqc_mzmls.collect(), - ch_pipeline_results.combine(ch_multiqc_files.collect()), - ch_ids_pmultiqc.collect(), + CREATE_INPUT_CHANNEL.out.ch_expdesign + .combine(ch_pipeline_results.combine(ch_multiqc_files.collect()) + .combine(ch_pmultiqc_mzmls.collect()) + .combine(ch_ids_pmultiqc.collect().ifEmpty([]))), ch_multiqc_quantms_logo ) multiqc_report = SUMMARYPIPELINE.out.ch_pmultiqc_report.toList()