diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..72dda28 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,33 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset + +[/assets/email*] +indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.gitignore b/.gitignore index fa6bdaa..b85a91e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,333 @@ +# development +tools + +# History files +.Rhistory +.Rapp.history + +# Session Data files +.RData +.RDataTmp + +# User-specific files +.Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# R Environment Variables +.Renviron + +# translation temp files +po/*~ + +# RStudio Connect folder +rsconnect/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + # Nextflow .nextflow* work/ -#JetBrains -.idea/ +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 -# MacOS -.DS_Store +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e69de29 diff --git a/.nf-core.yml b/.nf-core.yml index 2d28a8a..1e87604 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,9 +1,11 @@ +nf_core_version: 3.0.2 +repository_type: pipeline + lint: + actions_ci: false files_exist: - .gitattributes - .editorconfig - - .prettierignore - - .prettierrc.yml - .github/CONTRIBUTING.md - .github/ISSUE_TEMPLATE/bug_report.yml - .github/ISSUE_TEMPLATE/config.yml @@ -22,45 +24,21 @@ lint: - assets/email_template.html - assets/email_template.txt - assets/sendmail_template.txt - - assets/nf-core-DxNextflowRNA_logo_light.png - - assets/nf-core-dxnextflowrna_logo_light.png - - conf/igenomes.config + - assets/umcugenetics-dxnextflowrna_logo_dark.png + - assets/umcugenetics-dxnextflowrna_logo_light.png - conf/modules.config - conf/test.config - conf/test_full.config - - docs/images/nf-core-DxNextflowRNA_logo_light.png - - docs/images/nf-core-DxNextflowRNA_logo_dark.png + - docs/images/umcugenetics-dxnextflowrna_metro_map - docs/output.md - docs/README.md - - docs/README.md - docs/usage.md - - lib/nfcore_external_java_deps.jar - - lib/NfcoreTemplate.groovy - - lib/Utils.groovy - - lib/WorkflowMain.groovy - - lib/WorkflowDxNextflowRNA.groovy - - pyproject.toml files_unchanged: - .github/ISSUE_TEMPLATE/bug_report.yml - .github/workflows/linting.yml + - assets/email_template.html + - assets/email_template.txt - .gitignore - CODE_OF_CONDUCT.md - - assets/nf-core-dxnextflowrna_logo_light.png - - docs/images/nf-core-dxnextflowrna_logo_light.png - - docs/images/nf-core-dxnextflowrna_logo_dark.png multiqc_config: - report_comment - nextflow_config: - - dag.enabled - - dag.file - - manifest.name - - manifest.homePage - - params.outdir - - params.input - - params.validationShowHiddenParams - - params.validationSchemaIgnoreParams - - process.cpus - - process.memory - - process.time - - custom_config -repository_type: pipeline diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c9502cd --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,16 @@ +# umcugenetics/dxnextflowrna: Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v1.0.0 - [date] + +Initial release of umcugenetics/dxnextflowrna, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 0000000..7d8fc88 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,73 @@ +# UMCUGenetics/DxNextflowRNA: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + + + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [picard-tools](http://broadinstitute.github.io/picard) + +- [preseq](https://pubmed.ncbi.nlm.nih.gov/23435259/) + + > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. + +- [RSeQC](https://pubmed.ncbi.nlm.nih.gov/22743226/) + + > Wang L, Wang S, Li W. RSeQC: quality control of RNA-seq experiments Bioinformatics. 2012 Aug 15;28(16):2184-5. doi: 10.1093/bioinformatics/bts356. Epub 2012 Jun 27. PubMed PMID: 22743226. + +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [SortMeRNA](https://pubmed.ncbi.nlm.nih.gov/23071270/) + + > Kopylova E, Noé L, Touzet H. SortMeRNA: fast and accurate filtering of ribosomal RNAs in metatranscriptomic data Bioinformatics. 2012 Dec 15;28(24):3211-7. doi: 10.1093/bioinformatics/bts611. Epub 2012 Oct 15. PubMed PMID: 23071270. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + +- [UMI-tools](https://pubmed.ncbi.nlm.nih.gov/28100584/) + + > Smith T, Heger A, Sudbery I. UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy Genome Res. 2017 Mar;27(3):491-499. doi: 10.1101/gr.209601.116. Epub 2017 Jan 18. PubMed PMID: 28100584; PubMed Central PMCID: PMC5340976. + + +## Software packaging/containerisation tools +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..c089ec7 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,182 @@ +# Code of Conduct at nf-core (v1.4) + +## Our Pledge + +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: + +- Age +- Ability +- Body size +- Caste +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Neurodiversity +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status + +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. + +## Preamble + +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: + +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. + +## Our Responsibilities + +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. + +## When and where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Accept feedback, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) +- Take breaks when you feel like you need them. +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. + +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for reporting CoC violations + +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. + +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. + +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. + +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. + +## Attribution and Acknowledgements + +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) + +## Changelog + +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 + +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index fe92ab3..b7b7c58 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,117 @@ -[![GitHub Actions CI Status](https://github.com/UMCUGenetics/dxnextflowrna/workflows/nf-core%20CI/badge.svg)](https://github.com/UMCUGenetics/dxnextflowrna/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/UMCUGenetics/dxnextflowrna/workflows/nf-core%20linting/badge.svg)](https://github.com/UMCUGenetics/dxnextflowrna/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +

+ + + umcugenetics/dxnextflowrna + +

-[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![GitHub Actions CI Status](https://github.com/umcugenetics/dxnextflowrna/actions/workflows/ci.yml/badge.svg)](https://github.com/umcugenetics/dxnextflowrna/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/umcugenetics/dxnextflowrna/actions/workflows/linting.yml/badge.svg)](https://github.com/umcugenetics/dxnextflowrna/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/dxnextflowrna/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.10.2-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/umcugenetics/dxnextflowrna) + +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23dxnextflowrna-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/dxnextflowrna)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) + +## Introduction + +**umcugenetics/dxnextflowrna** is a bioinformatics pipeline that can be used to analyse human RNA sequencing data. +It takes FASTQ files as input, +performs quality control (QC), trimming, filtering and alignment, +and produces an extensive QC report. + +![umcugenetics/dxnextflowrna metro map](docs/images/umcugenetics-dxnextflowrna_metro_map.png) + +1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +3. Adapter and quality trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) +4. Removal of ribosomal RNA ([`SortMeRNA`](https://github.com/biocore/sortmerna)) +5. Alignment ([`STAR`](https://github.com/alexdobin/STAR)) +6. Merge, sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +7. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +8. Convert to CRAM ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +9. Extensive quality control: + 1. [`RSeQC`](http://rseqc.sourceforge.net/) + 2. [`Preseq`](http://smithlabresearch.org/software/preseq/) +10. Present QC ([`MultiQC`](http://multiqc.info/)) -# DxNextflowRNA -UMCU Genetics Nextflow RNA workflow ## Usage +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + +1. Create an input directory with sample's fastq files (paired, multiple lanes allowed.) +2. Clone the github repository and all dependencies if required. +3. Install +```bash +sh install.sh +``` +4. Now, you can run the pipeline using: + ```bash -export NXF_JAVA_HOME='/hpc/diaggen/projects/woc/rna/tools/jdk-18.0.2.1' -/hpc/diaggen/projects/woc/rna/tools/nextflow run DxNextflowRNA --input /hpc/diaggen/projects/woc/rna/testdata/fastq --outdir ... +/tools/nextflow/nextflow run \ +/main.nf \ +-c /nextflow.config \ +--input \ +--outdir \ +--analysis_id \ +--email \ +-resume \ +-ansi-log false \ +-profile singularity \ ``` +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). + +For more details and further functionality, please refer to the [usage documentation](docs/usage.md) + +## Pipeline output + +For more details about the output files and reports, please refer to the +[output documentation](docs/output.md). + +## Developers: Instructions and reminders when changing DxNextflowRNA +> [!WARNING] +> Please, try to keep using nfcore tools and guidelines! + +### Update files +- Update all type of citations: + - [`CITATIONS.md`](CITATIONS.md) file. + - toolCitationText and toolBibliographyText in [`utils_umcugenetics_dxnextflowrna_pipeline/main.nf`](./subworkflows/local/utils_umcugenetics_dxnextflowrna_pipeline/main.nf) +- Update version in [`nextflow.config`](nextflow.config) and [`multiqc_config`](assets/multiqc_config.yml) by using `nf-core pipelines bump-version ` +- Update [`metro map`](./docs/images/umcugenetics-dxnextflowrna_metro_map.png) + +### When updating Nextflow +- Update version by using `nf-core pipelines bump-version --nextflow ` +- Update version in [`install.sh`](./install.sh) and + ```bash + sh install.sh + ``` + + +## Credits + +umcugenetics/dxnextflowrna was originally written by UMCU Genetics. + + + + + + ## Citations - + + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. -This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). +You can cite the `nf-core` publication as follows: > **The nf-core framework for community-curated bioinformatics pipelines.** > diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000..c55ec51 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "umcugenetics/dxnextflowrna v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html new file mode 100644 index 0000000..9026b41 --- /dev/null +++ b/assets/email_template.html @@ -0,0 +1,52 @@ + + + + + + + + umcugenetics/dxnextflowrna Pipeline Report + + +
+ + +

umcugenetics/dxnextflowrna ${version}

+

Run Name: $runName

+ + <% if (!success){ + out << """ +
+

umcugenetics/dxnextflowrna execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+ """ + } else { + out << """ +
+ umcugenetics/dxnextflowrna execution completed successfully! +
+ """ + } + %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ + + +

umcugenetics/dxnextflowrna

+

https://github.com/umcugenetics/dxnextflowrna

+
+ + diff --git a/assets/email_template.txt b/assets/email_template.txt new file mode 100644 index 0000000..663c148 --- /dev/null +++ b/assets/email_template.txt @@ -0,0 +1,33 @@ + +umcugenetics/dxnextflowrna ${version} +Run Name: $runName + +<% if (success){ + out << "## umcugenetics/dxnextflowrna execution completed successfully! ##" +} else { + out << """#################################################### +## umcugenetics/dxnextflowrna execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + +-- +umcugenetics/dxnextflowrna +https://github.com/umcugenetics/dxnextflowrna diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000..9631649 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,29 @@ +id: "umcugenetics-dxnextflowrna-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "umcugenetics/dxnextflowrna Methods Description" +section_href: "https://github.com/umcugenetics/dxnextflowrna" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using umcugenetics/dxnextflowrna v${workflow.manifest.version} ${doi_text}

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index aa803aa..9b8f1f8 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,2 +1,163 @@ -top_modules: -- 'fastqc' \ No newline at end of file +report_comment: > + This report has been generated by the UMCUGenetics/dxnextflowrna + analysis pipeline. +# Order modules +report_section_order: + # Preprocessing and pre-alignment QC + fastqc: + order: 4004 + cutadapt: + order: 4003 + sortmerna: + order: 4002 + # Alignment + star: + order: 3000 + # Post-alignment QC + umitools: + order: 2001 + samtools: + order: 2002 + picard: + order: 2003 + preseq: + order: 2004 + rseqc: + order: 2005 + # Summaries + "umcugenetics-dxnextflowrna-methods-description": + order: -1000 + software_versions: + order: -1001 + "umcugenetics-dxnextflowrna-summary": + order: -1002 + +# Disable version detection; all versions are retrieved previously. +disable_version_detection: true +export_plots: true +# Column name in Software Versions Table instead of "Group". +versions_table_group_header: "Analysis Pipeline Step" + +# Define custom search patterns +sp: + software_versions: + fn_re: ".*ersions\\.(yaml|yml)" + +# Tool specific settings +preseq: + genome_size: hg38_genome + notrim: true + read_length: 300 + +# Sample grouping in General Stats +# Please keep sample grouping at bottom of multiqc config. +table_sample_merge: + "(L001 R1)": + - "L001_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L001[_.-](R1[_.-]001|1)$" + "(L001 R2)": + - "L001_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L001[_.-](R2[_.-]001|2)$" + "(L002 R1)": + - "L002_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L002[_.-](R1[_.-]001|1)$" + "(L002 R2)": + - "L002_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L002[_.-](R2[_.-]001|2)$" + "(L003 R1)": + - "L003_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L003[_.-](R1[_.-]001|1)$" + "(L003 R2)": + - "L003_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L003[_.-](R2[_.-]001|2)$" + "(L004 R1)": + - "L004_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L004[_.-](R1[_.-]001|1)$" + "(L004 R2)": + - "L004_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L004[_.-](R2[_.-]001|2)$" + "(L005 R1)": + - "L005_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L005[_.-](R1[_.-]001|1)$" + "(L005 R2)": + - "L005_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L005[_.-](R2[_.-]001|2)$" + "(L006 R1)": + - "L006_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L006[_.-](R1[_.-]001|1)$" + "(L006 R2)": + - "L006_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L006[_.-](R2[_.-]001|2)$" + "(L007 R1)": + - "L007_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L007[_.-](R1[_.-]001|1)$" + "(L007 R2)": + - "L007_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L007[_.-](R2[_.-]001|2)$" + "(L008 R1)": + - "L008_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L008[_.-](R1[_.-]001|1)$" + "(L008 R2)": + - "L008_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L008[_.-](R2[_.-]001|2)$" + "(Remaining lanes R1)": + - "L[0-9]{3}_R1" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L[0-9]{3}[_.-](R1[_.-]001|1)$" + "(Remaining lanes R2)": + - "L[0-9]{3}_R2" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L[0-9]{3}[_.-](R2[_.-]001|2)$" + + "(L001)": + - "L001" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L001$" + "(L002)": + - "L002" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L002$" + "(L003)": + - "L003" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L003$" + "(L004)": + - "L004" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L004$" + "(L005)": + - "L005" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L005$" + "(L006)": + - "L006" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L006$" + "(L007)": + - "L007" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L007$" + "(L008)": + - "L008" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L008$" + "(Remaining lanes)": + - "L[0-9]{3}" + - type: regex + pattern: "[_.-](A|B)[A-Za-z0-9]{9}[_.-]S[0-9]+[_.-]L[0-9]{3}$" diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt new file mode 100644 index 0000000..65bacba --- /dev/null +++ b/assets/sendmail_template.txt @@ -0,0 +1,53 @@ +To: $email +Subject: $subject +Mime-Version: 1.0 +Content-Type: multipart/related;boundary="nfcoremimeboundary" + +--nfcoremimeboundary +Content-Type: text/html; charset=utf-8 + +$email_html + +--nfcoremimeboundary +Content-Type: image/png;name="umcugenetics-dxnextflowrna_logo_light.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="umcugenetics-dxnextflowrna_logo_light.png" + +<% out << new File("$projectDir/assets/umcugenetics-dxnextflowrna_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..f6c6c00 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "umcugenetics/dxnextflowrna ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/assets/sortmerna-db-default.txt b/assets/sortmerna-db-default.txt new file mode 100755 index 0000000..3fedae2 --- /dev/null +++ b/assets/sortmerna-db-default.txt @@ -0,0 +1 @@ +/hpc/diaggen/data/databases/sortmerna/sortmerna_v4.3.4_db/smr_v4.3_sensitive_db.fasta diff --git a/assets/template.yml b/assets/template.yml new file mode 100644 index 0000000..5f04090 --- /dev/null +++ b/assets/template.yml @@ -0,0 +1,27 @@ +name: DxNextflowRNA +description: UMCU Genetics RNA seq Workflow +author: UMCU Genetics +prefix: UMCUGenetics +skip: + - github + - ci + - github_badges + - igenomes +template: + author: UMCU Genetics + description: UMCU Genetics RNA seq Workflow + force: false + is_nfcore: false + name: DxNextflowRNA + org: UMCUGenetics + outdir: . + skip_features: + - github + - ci + - igenomes + - gitpod + - codespaces + - email + - documentation + - test_config + version: 1.0.0 diff --git a/assets/umcugenetics-dxnextflowrna_logo_dark.png b/assets/umcugenetics-dxnextflowrna_logo_dark.png new file mode 100644 index 0000000..6e33321 Binary files /dev/null and b/assets/umcugenetics-dxnextflowrna_logo_dark.png differ diff --git a/assets/umcugenetics-dxnextflowrna_logo_light.png b/assets/umcugenetics-dxnextflowrna_logo_light.png new file mode 100644 index 0000000..3e756f4 Binary files /dev/null and b/assets/umcugenetics-dxnextflowrna_logo_light.png differ diff --git a/conf/base.config b/conf/base.config index 3294461..5ea2b69 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,51 +1,57 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - UMCUGenetics/DxNextflowRNA Nextflow base config file + umcugenetics/dxnextflowrna Nextflow base config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- */ process { - - // Defaults for all processes, based on HPC defaults - cpus = { 2 * task.attempt } - memory = { 10.GB * task.attempt } - time = { 1.h * task.attempt } + cpus = { 2 * task.attempt } + memory = { 10.GB * task.attempt } + time = { 1.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' - executor = 'slurm' - queue = 'cpu' - // clusterOptions = "$params.cluster_options" + executor = 'slurm' + queue = 'cpu' // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { 2 } - memory = { 8.GB * task.attempt } + cpus = { 1 } + memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } } withLabel:process_low { cpus = { 2 * task.attempt } - memory = { 16.GB * task.attempt } + memory = { 12.GB * task.attempt } time = { 4.h * task.attempt } } withLabel:process_medium { cpus = { 6 * task.attempt } - memory = { 32.GB * task.attempt } + memory = { 36.GB * task.attempt } time = { 8.h * task.attempt } } withLabel:process_high { cpus = { 12 * task.attempt } - memory = { 64.GB * task.attempt } + memory = { 72.GB * task.attempt } time = { 16.h * task.attempt } } withLabel:process_long { time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { 128.GB * task.attempt } + memory = { 200.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' @@ -55,21 +61,3 @@ process { maxRetries = 2 } } - -singularity { - enabled = true - runOptions = '-B /hpc:/hpc -B $TMPDIR:$TMPDIR' - autoMounts = true - cacheDir = '/hpc/diaggen/projects/woc/rna/container' -} - -executor { - queueSize = 1000 - pollInterval = '1min' - queueStatInterval = '5min' - submitRatelimit = '10sec' -} - -mail { - smtp.host = 'localhost' -} \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 81083f5..04e952c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,50 +10,213 @@ ---------------------------------------------------------------------------------------- */ +/* +Processes are ordered alphabetically + +Each process's parameters are ordered as well: +first the custom parameters (alphabetic order), +followed by the compute resources. + +*/ process { + withName: FASTQC { - ext.args = '--quiet' - publishDir = [ - [path: "$params.outdir/QC/", mode: params.publish_dir_mode, pattern: '*_{fastqc.html}'] + clusterOptions = "${params.cluster_options}" + ext.args = '--quiet' + cpus = { 2 } + memory = { 5.GB * task.attempt } + time = { (1.ms * reads.sum { it.size() } / reads.count { it } / 1000) * task.attempt } + } + + withName: MULTIQC { + clusterOptions = "${params.cluster_options}" + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : "--title \"${params.analysis_id}\"" } + ext.prefix = { $ { params.analysis_id } } + publishDir = [ + [ + path: "${params.outdir}/QC", + mode: params.publish_dir_mode + ] ] - cpus = 2 - memory = { 8.GB * task.attempt } - time = { 15.m * task.attempt } + cpus = { 2 } + memory = { 2.GB * task.attempt } + time = { 15.m * task.attempt } } - withName: SAMTOOLS_MERGE { - cpus = { 8 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 1.h * task.attempt } + // TODO: retrieve dynamic resources collectrnaseqmetrics + withName: PICARD_COLLECTRNASEQMETRICS { + clusterOptions = "${params.cluster_options} --gres=tmpspace:10G" + ext.args = '--STRAND_SPECIFICITY NONE' + publishDir = [ + [ + path: "${params.outdir}/QC/Picard", + mode: params.publish_dir_mode + ] + ] + cpus = { 2 } + memory = { 5.GB * task.attempt } + time = { 2.h * task.attempt } } - withName: STAR_ALIGN { - cpus = { 12 * task.attempt } - memory = { 40.GB * task.attempt } - time = { 1.h * task.attempt } - ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate' + withName: PRESEQ_LCEXTRAP { + clusterOptions = "${params.cluster_options}" + ext.args = { + [ + '-verbose', + '-bam', + '-defects', + '-seed 1', + '-seg_len 10000000' + ].join(' ').trim() + } + publishDir = [ + [ + path: "${params.outdir}/QC/lcextrap", + mode: params.publish_dir_mode + ] + ] + cpus = { 2 * task.attempt } + memory = { 5.GB * task.attempt } + time = { (1.ms * bam.size() / 3000) * task.attempt } } - withName: MULTIQC { - publishDir = [ - path: { "${params.outdir}/QC" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + withName: 'RSEQC_(BAMSTAT|INNERDISTANCE|INFEREXPERIMENT|JUNCTIONANNOTATION|JUNCTIONSATURATION|READDISTRIBUTION|READDUPLICATION)' { + clusterOptions = "${params.cluster_options}" + publishDir = [ + [ + path: "${params.outdir}/QC/rseqc", + mode: params.publish_dir_mode + ] ] + cpus = { 2 * task.attempt } + memory = { (4.B * bam.size()) * task.attempt } + time = { (1.ms * bam.size() / 1600) * task.attempt } } - withName: UMITOOLS_DEDUP { - ext.args = "--umi-separator=:" - ext.prefix = { "${meta.id}.markdup" } - publishDir = [ - [path: "$params.outdir/bam_files/", mode: params.publish_dir_mode, pattern: '*.ba*'] + // TODO: retrieve dynamic resources samtools + withName: SAMTOOLS_CONVERT { + clusterOptions = "${params.cluster_options}" + publishDir = [ + [ + path: "${params.outdir}/cram_files/", + mode: params.publish_dir_mode, + pattern: '*.cra?' + ] + ] + cpus = { 2 } + memory = { 16.GB * task.attempt } + time = { 4.h * task.attempt } + } + + withName: SAMTOOLS_FLAGSTAT { + clusterOptions = "${params.cluster_options}" + cpus = { 2 } + memory = { 5.GB * task.attempt } + time = { 1.h * task.attempt } + } + + withName: SAMTOOLS_IDXSTATS { + clusterOptions = "${params.cluster_options}" + cpus = { 2 } + memory = { 5.GB * task.attempt } + time = { 1.h * task.attempt } + } + + withName: SAMTOOLS_INDEX { + clusterOptions = "${params.cluster_options}" + cpus = { 2 } + memory = { 5.GB * task.attempt } + time = { 15.m * task.attempt } + } + + withName: SAMTOOLS_MERGE { + clusterOptions = "${params.cluster_options}" + ext.args = '--write-index' + cpus = { 2 } + memory = { 5.GB * task.attempt } + time = { (1.ms * bam.size() / 8000) * task.attempt } + } + + withName: SAMTOOLS_STATS { + clusterOptions = "${params.cluster_options}" + } + + // TODO: retrieve dynamic resources sortmerna + withName: SORTMERNA_INDEX { + clusterOptions = "${params.cluster_options}" + container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/15/15b50826336c394eab2bf6d731f00729921598ffc6463278da6316e768b16b8b/data' + ext.args = '--index 1' + publishDir = [ + path: "${params.outdir}/genome/sortmerna", + mode: params.publish_dir_mode ] - clusterOptions = "--gres=tmpspace:50G" + cpus = { 2 * task.attempt } + memory = { 64.GB * task.attempt } + time = { 4.h * task.attempt } } - withName: SUBREAD_FEATURECOUNTS { - publishDir = [ - [path: "$params.outdir/feature_counts/", mode: params.publish_dir_mode, pattern: '*featureCounts.txt*'], + // TODO: retrieve dynamic resources sortmerna + withName: SORTMERNA_READS { + clusterOptions = "${params.cluster_options}" + container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/15/15b50826336c394eab2bf6d731f00729921598ffc6463278da6316e768b16b8b/data' + ext.args = '--num_alignments 1 -v --index 0' + publishDir = [ + [ + path: { "${params.outdir}/sortmerna" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { params.save_non_ribo_reads ? "${params.outdir}/sortmerna" : params.outdir }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + saveAs: { params.save_non_ribo_reads ? it : null } + ] ] + cpus = { 12 * task.attempt } + memory = { 64.GB * task.attempt } + time = { 16.h * task.attempt } + } + + withName: STAR_ALIGN { + clusterOptions = "${params.cluster_options}" + ext.args = { + [ + '--readFilesCommand zcat', + '--outSAMtype BAM SortedByCoordinate', + '--outSAMunmapped Within KeepPairs', + '--twopassMode Basic', + '--outReadsUnmapped Fastx', + '--alignSJDBoverhangMin 10', + '--chimJunctionOverhangMin 12', + '--chimSegmentMin 12', + '--outWigType bedGraph', + '--outWigStrand Unstranded' + ].join(' ').trim() + } + cpus = { 6 * task.attempt } + memory = { 40.GB * task.attempt } + time = { (1.ms * reads.sum { it.size() } / reads.count { it } / 300) * task.attempt } + } + + withName: TRIMGALORE { + clusterOptions = "${params.cluster_options}" + cpus = { 12 * task.attempt } + memory = { 5.GB * task.attempt } + time = { (1.ms * reads.sum { it.size() } / reads.count { it } / 170) * task.attempt } + } + + withName: UMITOOLS_DEDUP { + clusterOptions = "${params.cluster_options} --gres=tmpspace:40G" + ext.args = { + [ + "--umi-separator=:", + "--temp-dir \${TMPDIR}" + ].join(' ').trim() + } + ext.prefix = { "${meta.id}.dedup" } + cpus = { 6 * task.attempt } + memory = { 32.GB * task.attempt } + time = { 8.h * task.attempt } } -} \ No newline at end of file +} diff --git a/conf/test.config b/conf/test.config new file mode 100644 index 0000000..4f058fb --- /dev/null +++ b/conf/test.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run umcugenetics/dxnextflowrna -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + + // Genome references + genome = 'R64-1-1' +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 0000000..9f6411c --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run umcugenetics/dxnextflowrna -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + + // Genome references + genome = 'R64-1-1' +} diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..46e952a --- /dev/null +++ b/docs/README.md @@ -0,0 +1,12 @@ +# umcugenetics/dxnextflowrna: Documentation + +The umcugenetics/dxnextflowrna documentation is split into the following pages: + +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. +- [Parameters](parameters.md) + - An overview of the parameters of the pipeline. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/umcugenetics-dxnextflowrna_metro_map.png b/docs/images/umcugenetics-dxnextflowrna_metro_map.png new file mode 100644 index 0000000..9d10bdb Binary files /dev/null and b/docs/images/umcugenetics-dxnextflowrna_metro_map.png differ diff --git a/docs/output.md b/docs/output.md new file mode 100644 index 0000000..ae3e158 --- /dev/null +++ b/docs/output.md @@ -0,0 +1,61 @@ +# umcugenetics/dxnextflowrna: Output + +## Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + + + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [FastQC](#fastqc) - Raw read QC +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +### FastQC + +
+Output files + +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +### MultiQC + +
+Output files + +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/parameters.md b/docs/parameters.md new file mode 100644 index 0000000..38829d7 --- /dev/null +++ b/docs/parameters.md @@ -0,0 +1,114 @@ +# umcugenetics/dxnextflowrna pipeline parameters +UMCU Genetics RNA seq Workflow + +## Input/output options +Define where the pipeline should find input data and save output data. + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `analysis_id` | | `string` | | True | | +| `input` | Path to input data. | `string` | | True | | +| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | | True | | +| `email` | Email address for completion summary.
HelpSet this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your +user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.
| `string` | | | | +| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | | + + +## Reference genome options + + +Reference genome related files and options required for the workflow. + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `fasta` | Path to FASTA genome file. | `string` | | | | +| `fai` | Path to FASTA genome index file. | `string` | | | | +| `gencode_version_name` | | `string` | GRCh38_gencode_v22_CTAT_lib_Mar012021 | | | +| `genome_base` | | `string` | | | | +| `gene_bed` | | `string` | | | | +| `gtf` | Path to GTF annotation file. | `string` | | | | +| `ref_flat` | | `string` | | | | +| `rrna_database_manifest` | | `string` | | | | +| `rrna_intervals` | | `string` | | | | +| `sortmerna_index` | | `string` | | | | +| `sortmerna_index_versions` | | `string` | | | | +| `star_index` | Path to directory or tar.gz archive for pre-built STAR index. | `string` | | | | + + + +### sortmerna_index and sortmerna_index_versions + +Runtimes of SortMeRNA are optimized by creating a sortmerna index first. +This is done by executing the tool with settings as configured in the modules.config with name SORTMERNA_INDEX. +The [`rrna_database_manifest`](assets/sortmerna-db-default.txtl) can be used as input. +Down below an example of nextflow workflow to run SORTMERNA_INDEX. + +```nextflow +nextflow.enable.dsl = 2 + +include { SORTMERNA as SORTMERNA_INDEX } from './modules/local/sortmerna/main' + +workflow { + SORTMERNA_INDEX( + [[], []], + Channel.from(file(params.rrna_database_manifest).readLines()) + .map { row -> file(row, checkIfExists: true) } + .collect().map { [[id: 'rrna_refs'], it] }, + [[], []] + ) +} +``` + +## Institutional config options + + +Parameters used to describe centralised config profiles. These should not be edited. + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | +| `custom_config_base` | Base directory for Institutional configs.
HelpIf you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't +need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.
| `string` | +https://raw.githubusercontent.com/nf-core/configs/master | | True | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | +| `config_profile_contact` | Institutional config contact information. | `string` | | | True | +| `config_profile_url` | Institutional config URL link. | `string` | | | True | + + +## Generic options + + +Less common options for the pipeline, typically set in a config file. + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `cluster_options` | | `string` | --mail-user null --mail-type FAIL --account=diaggen | | | +| `version` | Display version and exit. | `boolean` | | | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory.
HelpThe Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. +This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.
| `string` | link | | True | +| `email_on_fail` | Email address for completion summary, only when pipeline fails.
HelpAn email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does +not exit successfully.
| `string` | | | True | +| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | +| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | +| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | +| `hook_url` | Incoming hook URL for messaging service
HelpIncoming hook URL for messaging service. Currently, MS Teams and Slack are supported.
| `string` | | | True | +| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | /home/cog/edejong2/repos/DxNextflowRNA_create_pipeline/assets/multiqc_config.yml | | True | +| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True | +| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | | +| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True | +| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` | https://raw.githubusercontent.com/nf-core/test-datasets/ | | True | +| `rseqc_modules` | | `string` | bam_stat,infer_experiment,inner_distance,junction_annotation,junction_saturation,read_distribution,read_duplication | | | +| `save_non_ribo_reads` | | `boolean` | | | | +| `seq_platform` | Sequencing platform | `string` | Illumina | | | +| `seq_center` | Sequencing center | `string` | UMCU Genetics | | | + +## Other parameters + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `dx_tracks_path` | | `string` | /hpc/diaggen/software/development/Dx_tracks/ | | | diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..75c505b --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,150 @@ +# umcugenetics/dxnextflowrna: Usage +## Introduction + + +## Running the pipeline +1. Create an input directory with sample's fastq files (paired, multiple lanes allowed.) +2. Clone the github repository and all dependencies if required. +3. Install +```bash +sh install.sh +``` +1. Now, you can run the pipeline using: + +```bash +/tools/nextflow/nextflow run \ +/main.nf \ +-c /nextflow.config \ +--input \ +--outdir \ +--analysis_id \ +--email \ +-resume \ +-ansi-log false \ +-profile singularity \ +``` + +This will launch the pipeline with the `singularity` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + + +### Reproducibility + +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [umcugenetics/dxnextflowrna releases page](https://github.com/umcugenetics/dxnextflowrna/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + +## Core Nextflow arguments + +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..c4c2cf2 --- /dev/null +++ b/install.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Set parameters +java_download_url='https://download.java.net/java/GA/jdk21.0.2/f2283984656d49d69e91c558476027ac/13/GPL/openjdk-21.0.2_linux-x64_bin.tar.gz' +nextflow_version=24.10.2 + +# Remember the cwd +repo_dir=$PWD + +echo "Updating Git submodules" + +# Update submodules +git submodule update --init --recursive + +# Create tools directories +mkdir -p ${repo_dir}/tools/java +mkdir -p ${repo_dir}/tools/nextflow + +cd ${repo_dir}/tools/java + +echo "Downloading and installing OpenJDK" +# Download OpenJDK and untar +wget ${java_download_url} -O - | tar -xz + +# Remove the version number of the Java exec directory, since it's listed as a version independent folder in the run script +# Only one file should be there, but still a for loop because the last part of the file is unknown +for file in ${repo_dir}/tools/java/*; do + # Use sed to remove trailing bash, "-" and numbers + new_name=${file%-*} + echo "renaming" ${file} "to" ${new_name} + # Rename the file + mv -- ${file} ${new_name} +done + +# Log the downloaded OpenJDK version + arch +java_filename=$(basename ${java_download_url}) +java_base=${java_filename%%.*} +echo ${java_base} > java_version.txt + +chmod +x ${repo_dir}/tools/java/jdk + +${repo_dir}/tools/java/jdk/bin/java -version + +echo "OpenJDK ${java_base} installed to ${repo_dir}/tools/java/jdk" + +echo "Downloading and installing Nextflow" + +cd ${repo_dir}/tools/nextflow + +export NXF_JAVA_HOME=${repo_dir}'/tools/java/jdk' +export NXF_VER=${nextflow_version} + +# Download and install nextflow +curl -s https://get.nextflow.io | bash + +chmod +x ${repo_dir}/tools/nextflow/nextflow + +${repo_dir}/tools/nextflow/nextflow -v + +echo "Nextflow ${nextflow_version} installed on ${repo_dir}/tools/nextflow/nextflow" + +echo "All done!" diff --git a/main.nf b/main.nf index 93238a4..c2c6b69 100644 --- a/main.nf +++ b/main.nf @@ -1,115 +1,75 @@ #!/usr/bin/env nextflow /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - UMCUGenetics/DxNextflowRNA + umcugenetics/dxnextflowrna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Github : https://github.com/UMCUGenetics/DxNextflowRNA + Github : https://github.com/umcugenetics/dxnextflowrna ---------------------------------------------------------------------------------------- */ -nextflow.enable.dsl = 2 - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Validate parameters + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; } from 'plugin/nf-validation' -validateParameters() - +include { DXNEXTFLOWRNA } from './workflows/dxnextflowrna' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_umcugenetics_dxnextflowrna_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_umcugenetics_dxnextflowrna_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Import modules/subworkflows -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from './subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' - -include { FASTQC } from './modules/nf-core/fastqc/main' -include { MULTIQC } from './modules/nf-core/multiqc/main' -include { SAMTOOLS_INDEX } from './modules/nf-core/samtools/index/main' -include { SAMTOOLS_MERGE } from './modules/nf-core/samtools/merge/main' -include { STAR_ALIGN } from './modules/nf-core/star/align/main' -include { SUBREAD_FEATURECOUNTS } from './modules/nf-core/subread/featurecounts/main' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Main workflow + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow { - // Reference file channels - ch_star_index = Channel.fromPath(params.star_index).map {star_index -> [star_index.getSimpleName(), star_index] } - ch_gtf = Channel.fromPath(params.gtf).map { gtf -> [gtf.getSimpleName(), gtf] } - - // Input channel - ch_fastq = Channel.fromFilePairs("$params.input/*_R{1,2}_001.fastq.gz") - .map { - meta, fastq -> - def fmeta = [:] - // Set meta.id - fmeta.id = meta - // Set meta.single_end - if (fastq.size() == 1) { - fmeta.single_end = true - } else { - fmeta.single_end = false - } - [ fmeta, fastq ] - } - - // Trim, Alignment, FeatureCounts - // TRIMGALORE - - STAR_ALIGN( - ch_fastq, - ch_star_index.first(), - ch_gtf.first(), - false, - 'illumina', - 'UMCU Genetics' + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION( + params.version, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, ) - SAMTOOLS_MERGE( - STAR_ALIGN.out.bam_sorted.map { - meta, bam -> - new_id = meta.id.split('_')[0] - [ meta + [id: new_id], bam ] - }.groupTuple(), - [ [ id:'null' ], []], - [ [ id:'null' ], []], - ) - - SAMTOOLS_INDEX ( SAMTOOLS_MERGE.out.bam ) - - SAMTOOLS_MERGE.out.bam - .join(SAMTOOLS_INDEX.out.bai) - .set { ch_bam_bai } - - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS( - ch_bam_bai, - true + // + // WORKFLOW: Run main workflow + // + UMCUGENETICS_DXNEXTFLOWRNA() + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + UMCUGENETICS_DXNEXTFLOWRNA.out.multiqc_report, ) +} - SUBREAD_FEATURECOUNTS( - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bam.map{ - meta, bam -> [ meta, bam, params.gtf ] - } - ) +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - // QC - FASTQC(ch_fastq) +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// +workflow UMCUGENETICS_DXNEXTFLOWRNA { + main: - // MultiQC - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - MULTIQC( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - Channel.empty().toList(), - Channel.empty().toList() - ) + // + // WORKFLOW: Run pipeline + // + DXNEXTFLOWRNA() + emit: + multiqc_report = DXNEXTFLOWRNA.out.multiqc_report // channel: /path/to/multiqc_report.html } diff --git a/modules.json b/modules.json index f2424a6..1295090 100644 --- a/modules.json +++ b/modules.json @@ -1,76 +1,233 @@ { - "name": "UMCUGenetics/DxNextflowRNA", - "homePage": "https://github.com/UMCUGenetics/DxNextflowRNA", - "repos": { - "https://github.com/nf-core/modules.git": { - "modules": { - "nf-core": { - "fastqc": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["modules"] - }, - "multiqc": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["modules"] - }, - "samtools/flagstat": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["bam_stats_samtools"] - }, - "samtools/idxstats": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["bam_stats_samtools"] - }, - "samtools/index": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["bam_dedup_stats_samtools_umitools"] - }, - "samtools/merge": { - "branch": "master", - "git_sha": "e7ce60acc8a33fa17429e966364657a63016e870", - "installed_by": ["modules"] - }, - "samtools/stats": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["bam_stats_samtools"] - }, - "star/align": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["modules"] - }, - "subread/featurecounts": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["modules"] - }, - "umitools/dedup": { - "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["bam_dedup_stats_samtools_umitools"] - } + "name": "umcugenetics/dxnextflowrna", + "homePage": "https://github.com/umcugenetics/dxnextflowrna", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "fastqc": { + "branch": "master", + "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", + "installed_by": [ + "modules" + ] + }, + "multiqc": { + "branch": "master", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", + "installed_by": [ + "modules" + ] + }, + "picard/collectrnaseqmetrics": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": [ + "modules" + ] + }, + "preseq/lcextrap": { + "branch": "master", + "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", + "installed_by": [ + "modules" + ] + }, + "rseqc/bamstat": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "rseqc/inferexperiment": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "rseqc/innerdistance": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "rseqc/junctionannotation": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "rseqc/junctionsaturation": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "rseqc/readdistribution": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "rseqc/readduplication": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "rseqc/tin": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "bam_rseqc", + "modules" + ] + }, + "samtools/convert": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": [ + "modules" + ] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": [ + "bam_stats_samtools", + "modules" + ] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": [ + "bam_stats_samtools", + "modules" + ] + }, + "samtools/index": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": [ + "bam_dedup_stats_samtools_umitools", + "modules" + ] + }, + "samtools/merge": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": [ + "modules" + ] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": [ + "bam_stats_samtools", + "modules" + ] + }, + "sortmerna": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "modules" + ] + }, + "star/align": { + "branch": "master", + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": [ + "modules" + ] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": [ + "modules" + ] + }, + "trimgalore": { + "branch": "master", + "git_sha": "4c2d06a5e79abf08ba7f04c58e39c7dad75f094d", + "installed_by": [ + "modules" + ] + }, + "umitools/dedup": { + "branch": "master", + "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", + "installed_by": [ + "bam_dedup_stats_samtools_umitools", + "modules" + ] + } + } + }, + "subworkflows": { + "nf-core": { + "bam_dedup_stats_samtools_umitools": { + "branch": "master", + "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", + "installed_by": [ + "subworkflows" + ] + }, + "bam_rseqc": { + "branch": "master", + "git_sha": "0eacd714effe5aac1c1de26593873960b3346cab", + "installed_by": [ + "subworkflows" + ] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", + "installed_by": [ + "bam_dedup_stats_samtools_umitools", + "subworkflows" + ] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", + "installed_by": [ + "subworkflows" + ] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "installed_by": [ + "subworkflows" + ] + }, + "utils_nfschema_plugin": { + "branch": "master", + "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", + "installed_by": [ + "subworkflows" + ] + } + } + } } - }, - "subworkflows": { - "nf-core": { - "bam_dedup_stats_samtools_umitools": { - "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] - }, - "bam_stats_samtools": { - "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["bam_dedup_stats_samtools_umitools"] - } - } - } } - } } diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index f52a53a..691d4c7 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,6 +1,5 @@ channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 50e59f2..752c3a1 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -24,7 +24,15 @@ process FASTQC { // Make list of old name and new name pairs to use for renaming in the bash while loop def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,11 +41,12 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +58,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index ee5507e..2b2e62b 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -11,40 +11,50 @@ tools: FastQC gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other overrepresented sequences. homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ licence: ["GPL-2.0-only"] + identifier: biotools:fastqc input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a14..e9d79a0 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,38 +3,306 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { process { """ - input[0] = [ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) """ } } then { assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + { assert process.success }, + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32c..d5db309 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,392 @@ { - "versions": { + "sarscov2 custom_prefix": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:16.374038" + }, + "sarscov2 single-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:24.993809" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:10.93942" + }, + "sarscov2 interleaved [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:42.355718" + }, + "sarscov2 paired-end [bam]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:53.276274" + }, + "sarscov2 multiple [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:05.527626" + }, + "sarscov2 paired-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:31.188871" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:34.273566" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:02.304411" + }, + "sarscov2 single-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:19.095607" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:44.640184" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:53.550742" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 9d0e6b2..6f5b867 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,6 +1,5 @@ channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 2bbc398..cc0643e 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,14 +3,16 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,14 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ + $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml @@ -41,8 +51,8 @@ process MULTIQC { stub: """ - touch multiqc_data - touch multiqc_plots + mkdir multiqc_data + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index a61223e..b16c187 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,6 +1,6 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -13,40 +13,59 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 0000000..33316a7 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + config "./nextflow.config" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..2fcbb5f --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:51:46.317523" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:20.680978" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:09.185842" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 0000000..c537a6a --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 0000000..bea6c0d --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/subread/featurecounts/environment.yml b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml similarity index 56% rename from modules/nf-core/subread/featurecounts/environment.yml rename to modules/nf-core/picard/collectrnaseqmetrics/environment.yml index 962c477..1d715d5 100644 --- a/modules/nf-core/subread/featurecounts/environment.yml +++ b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml @@ -1,6 +1,5 @@ channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::subread=2.0.1 + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/collectrnaseqmetrics/main.nf b/modules/nf-core/picard/collectrnaseqmetrics/main.nf new file mode 100644 index 0000000..eb80fdc --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/main.nf @@ -0,0 +1,62 @@ +process PICARD_COLLECTRNASEQMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path ref_flat + path fasta + path rrna_intervals + + output: + tuple val(meta), path("*.rna_metrics") , emit: metrics + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def rrna = rrna_intervals ? "--RIBOSOMAL_INTERVALS ${rrna_intervals}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectRnaSeqMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectRnaSeqMetrics \\ + $args \\ + $reference \\ + $rrna \\ + --REF_FLAT $ref_flat \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.rna_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectRnaSeqMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.rna_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectRnaSeqMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectrnaseqmetrics/meta.yml b/modules/nf-core/picard/collectrnaseqmetrics/meta.yml new file mode 100644 index 0000000..15d146b --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/meta.yml @@ -0,0 +1,68 @@ +name: "picard_collectrnaseqmetrics" +description: Collect metrics from a RNAseq BAM file +keywords: + - rna + - bam + - metrics + - alignment + - statistics + - quality +tools: + - "picard": + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: "https://broadinstitute.github.io/picard/" + documentation: "https://broadinstitute.github.io/picard/" + tool_dev_url: "https://github.com/broadinstitute/picard" + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false, strandedness:true ] + - bam: + type: file + description: BAM/SAM file + pattern: "*.{bam,sam}" + - - ref_flat: + type: file + description: Genome ref_flat file + - - fasta: + type: file + description: Genome fasta file + - - rrna_intervals: + type: file + description: Interval file of ribosomal RNA regions +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.rna_metrics": + type: file + description: RNA alignment metrics files generated by picard + pattern: "*.rna_metrics" + - pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: Plot normalized position vs. coverage in a pdf file generated by + picard + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test new file mode 100644 index 0000000..9ab1855 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process PICARD_COLLECTRNASEQMETRICS" + script "../main.nf" + process "PICARD_COLLECTRNASEQMETRICS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectrnaseqmetrics" + tag "ucsc/gtftogenepred" + + setup { + run("UCSC_GTFTOGENEPRED") { + script "../../../ucsc/gtftogenepred/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + test("sarscov2 - fasta - gtf") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.metrics[0][1]).text.contains('CollectRnaSeqMetrics') }, + { assert snapshot( + process.out.versions, + process.out.pdf + ).match() } + ) + } + } + + test("sarscov2 - fasta - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap new file mode 100644 index 0000000..ad6503a --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2 - fasta - gtf": { + "content": [ + [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:39.199344417" + }, + "sarscov2 - fasta - gtf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + + ], + "versions": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:57.248132065" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config new file mode 100644 index 0000000..bc82e10 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName:UCSC_GTFTOGENEPRED { + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" + } + withName:PICARD_COLLECTRNASEQMETRICS { + ext.args = { ( meta.strandedness == "forward" || meta.single_end ) ? + "--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND" : + meta.strandedness == "reverse" ? + "--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND" : + "--STRAND_SPECIFICITY NONE" + } + } +} diff --git a/modules/nf-core/preseq/lcextrap/environment.yml b/modules/nf-core/preseq/lcextrap/environment.yml new file mode 100644 index 0000000..6300d3f --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::preseq=3.2.0 diff --git a/modules/nf-core/preseq/lcextrap/main.nf b/modules/nf-core/preseq/lcextrap/main.nf new file mode 100644 index 0000000..540a5fb --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/main.nf @@ -0,0 +1,53 @@ +process PRESEQ_LCEXTRAP { + tag "$meta.id" + label 'process_single' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/preseq:3.2.0--hdcf5f25_6': + 'biocontainers/preseq:3.2.0--hdcf5f25_6' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.lc_extrap.txt"), emit: lc_extrap + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + args = task.attempt > 1 ? args.join(' -defects') : args // Disable testing for defects + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-pe' + """ + preseq \\ + lc_extrap \\ + $args \\ + $paired_end \\ + -output ${prefix}.lc_extrap.txt \\ + $bam + cp .command.err ${prefix}.command.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.lc_extrap.txt + touch ${prefix}.command.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/preseq/lcextrap/meta.yml b/modules/nf-core/preseq/lcextrap/meta.yml new file mode 100644 index 0000000..28fb449 --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/meta.yml @@ -0,0 +1,58 @@ +name: preseq_lcextrap +description: Software for predicting library complexity and genome coverage in high-throughput + sequencing +keywords: + - preseq + - library + - complexity +tools: + - preseq: + description: Software for predicting library complexity and genome coverage in + high-throughput sequencing + homepage: http://smithlabresearch.org/software/preseq/ + documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf + tool_dev_url: https://github.com/smithlabcode/preseq + licence: ["GPL"] + identifier: biotools:preseq +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - lc_extrap: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.lc_extrap.txt": + type: file + description: File containing output of Preseq lcextrap + pattern: "*.{lc_extrap.txt}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file containing stderr produced by Preseq + pattern: "*.{log}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@edmundmiller" +maintainers: + - "@drpatelh" + - "@edmundmiller" diff --git a/modules/nf-core/preseq/lcextrap/tests/main.nf.test b/modules/nf-core/preseq/lcextrap/tests/main.nf.test new file mode 100644 index 0000000..d1af1f0 --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/tests/main.nf.test @@ -0,0 +1,100 @@ +nextflow_process { + + name "Test Process PRESEQ_LCEXTRAP" + script "../main.nf" + process "PRESEQ_LCEXTRAP" + tag "modules" + tag "modules_nfcore" + tag "preseq" + tag "preseq/lcextrap" + + test("sarscov2 - single_end") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'delete_me/preseq/SRR1003759_5M_subset.mr', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log[0][1]).name).match("single_end - log") }, + { assert snapshot(process.out.lc_extrap).match("single_end - lc_extrap") }, + { assert snapshot(process.out.versions).match("single_end - versions") } + ) + } + } + + test("sarscov2 - single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'delete_me/preseq/SRR1003759_5M_subset.mr', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - paired_end") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'delete_me/preseq/SRR1003759_5M_subset.mr', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log[0][1]).name).match("paired_end - log") }, + { assert snapshot(process.out.lc_extrap).match("paired_end - lc_extrap") }, + { assert snapshot(process.out.versions).match("paired_end - versions") } + ) + } + } + + test("sarscov2 - paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'delete_me/preseq/SRR1003759_5M_subset.mr', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/preseq/lcextrap/tests/main.nf.test.snap b/modules/nf-core/preseq/lcextrap/tests/main.nf.test.snap new file mode 100644 index 0000000..c725d8e --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/tests/main.nf.test.snap @@ -0,0 +1,188 @@ +{ + "single_end - lc_extrap": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.lc_extrap.txt:md5,1fa5cdd601079329618f61660bee00de" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-11-23T17:20:40.735535" + }, + "paired_end - log": { + "content": [ + "test.command.log" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-11-23T17:20:51.981746" + }, + "single_end - versions": { + "content": [ + [ + "versions.yml:md5,e099c02ff6a63c1dacc6c79fd3d0223e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:55:55.821665692" + }, + "sarscov2 - single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.lc_extrap.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.command.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e099c02ff6a63c1dacc6c79fd3d0223e" + ], + "lc_extrap": [ + [ + { + "id": "test", + "single_end": true + }, + "test.lc_extrap.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.command.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e099c02ff6a63c1dacc6c79fd3d0223e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:56:11.779385911" + }, + "paired_end - versions": { + "content": [ + [ + "versions.yml:md5,e099c02ff6a63c1dacc6c79fd3d0223e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:56:31.353046182" + }, + "single_end - log": { + "content": [ + "test.command.log" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-11-23T17:20:40.72985" + }, + "paired_end - lc_extrap": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.lc_extrap.txt:md5,10e5ea860e87fb6f5dc10f4f20c62040" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-11-23T17:20:51.998533" + }, + "sarscov2 - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.lc_extrap.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.command.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e099c02ff6a63c1dacc6c79fd3d0223e" + ], + "lc_extrap": [ + [ + { + "id": "test", + "single_end": false + }, + "test.lc_extrap.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.command.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e099c02ff6a63c1dacc6c79fd3d0223e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:56:45.034808313" + } +} \ No newline at end of file diff --git a/modules/nf-core/preseq/lcextrap/tests/tags.yml b/modules/nf-core/preseq/lcextrap/tests/tags.yml new file mode 100644 index 0000000..b9e25ea --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/tests/tags.yml @@ -0,0 +1,2 @@ +preseq/lcextrap: + - modules/nf-core/preseq/lcextrap/** diff --git a/modules/nf-core/rseqc/bamstat/environment.yml b/modules/nf-core/rseqc/bamstat/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/bamstat/main.nf b/modules/nf-core/rseqc/bamstat/main.nf new file mode 100644 index 0000000..167240c --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/main.nf @@ -0,0 +1,45 @@ +process RSEQC_BAMSTAT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam_stat.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bam_stat.py \\ + -i $bam \\ + $args \\ + > ${prefix}.bam_stat.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(bam_stat.py --version | sed -e "s/bam_stat.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam_stat.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(bam_stat.py --version | sed -e "s/bam_stat.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/bamstat/meta.yml b/modules/nf-core/rseqc/bamstat/meta.yml new file mode 100644 index 0000000..d69f054 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/meta.yml @@ -0,0 +1,47 @@ +name: rseqc_bamstat +description: Generate statistics from a bam file +keywords: + - bam + - qc + - bamstat +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the bam file to calculate statistics of + pattern: "*.{bam}" +output: + - txt: + - meta: + type: file + description: bam statistics report + pattern: "*.bam_stat.txt" + - "*.bam_stat.txt": + type: file + description: bam statistics report + pattern: "*.bam_stat.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/bamstat/tests/main.nf.test b/modules/nf-core/rseqc/bamstat/tests/main.nf.test new file mode 100644 index 0000000..86f4301 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process RSEQC_BAMSTAT" + script "../main.nf" + process "RSEQC_BAMSTAT" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/bamstat" + + config "./nextflow.config" + + test("sarscov2 - [meta] - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam') + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } + + test("sarscov2 - [meta] - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam') + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/modules/nf-core/rseqc/bamstat/tests/main.nf.test.snap b/modules/nf-core/rseqc/bamstat/tests/main.nf.test.snap new file mode 100644 index 0000000..deae0e8 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - [meta] - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:16:13.970299" + }, + "sarscov2 - [meta] - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,2675857864c1d1139b2a19d25dc36b09" + ] + ], + "1": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,2675857864c1d1139b2a19d25dc36b09" + ] + ], + "versions": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-12-14T17:00:23.830276754" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/bamstat/tests/nextflow.config b/modules/nf-core/rseqc/bamstat/tests/nextflow.config new file mode 100644 index 0000000..8730f1c --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/modules/nf-core/rseqc/bamstat/tests/tags.yml b/modules/nf-core/rseqc/bamstat/tests/tags.yml new file mode 100644 index 0000000..ed9a41f --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/bamstat: + - modules/nf-core/rseqc/bamstat/** diff --git a/modules/nf-core/rseqc/inferexperiment/environment.yml b/modules/nf-core/rseqc/inferexperiment/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/inferexperiment/main.nf b/modules/nf-core/rseqc/inferexperiment/main.nf new file mode 100644 index 0000000..87c676a --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/main.nf @@ -0,0 +1,47 @@ +process RSEQC_INFEREXPERIMENT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.infer_experiment.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + infer_experiment.py \\ + -i $bam \\ + -r $bed \\ + $args \\ + > ${prefix}.infer_experiment.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(infer_experiment.py --version | sed -e "s/infer_experiment.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.infer_experiment.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(infer_experiment.py --version | sed -e "s/infer_experiment.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/inferexperiment/meta.yml b/modules/nf-core/rseqc/inferexperiment/meta.yml new file mode 100644 index 0000000..3ac7572 --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/meta.yml @@ -0,0 +1,51 @@ +name: rseqc_inferexperiment +description: Infer strandedness from sequencing reads +keywords: + - rnaseq + - strandedness + - experiment +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the bam file to calculate statistics of + pattern: "*.{bam}" + - - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - txt: + - meta: + type: file + description: infer_experiment results report + pattern: "*.infer_experiment.txt" + - "*.infer_experiment.txt": + type: file + description: infer_experiment results report + pattern: "*.infer_experiment.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/inferexperiment/tests/main.nf.test b/modules/nf-core/rseqc/inferexperiment/tests/main.nf.test new file mode 100644 index 0000000..f073844 --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process RSEQC_INFEREXPERIMENT" + script "../main.nf" + process "RSEQC_INFEREXPERIMENT" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/inferexperiment" + + test("sarscov2 - [[meta] - bam] - bed") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true)) + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } + + test("sarscov2 - [[meta] - bam] - bed - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true)) + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/modules/nf-core/rseqc/inferexperiment/tests/main.nf.test.snap b/modules/nf-core/rseqc/inferexperiment/tests/main.nf.test.snap new file mode 100644 index 0000000..e31c112 --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - [[meta] - bam] - bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,f56ba5c6208ae720dd730f3a432e3ba7" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f56ba5c6208ae720dd730f3a432e3ba7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:34:31.050305" + }, + "sarscov2 - [[meta] - bam] - bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,f9d0bfc239df637cd8aeda40ade3c59a" + ] + ], + "1": [ + "versions.yml:md5,f56ba5c6208ae720dd730f3a432e3ba7" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,f9d0bfc239df637cd8aeda40ade3c59a" + ] + ], + "versions": [ + "versions.yml:md5,f56ba5c6208ae720dd730f3a432e3ba7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:19.853327" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/inferexperiment/tests/nextflow.config b/modules/nf-core/rseqc/inferexperiment/tests/nextflow.config new file mode 100644 index 0000000..8730f1c --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/modules/nf-core/rseqc/inferexperiment/tests/tags.yml b/modules/nf-core/rseqc/inferexperiment/tests/tags.yml new file mode 100644 index 0000000..f9ba7e2 --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/inferexperiment: + - modules/nf-core/rseqc/inferexperiment/** diff --git a/modules/nf-core/rseqc/innerdistance/environment.yml b/modules/nf-core/rseqc/innerdistance/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/innerdistance/main.nf b/modules/nf-core/rseqc/innerdistance/main.nf new file mode 100644 index 0000000..207f5fb --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/main.nf @@ -0,0 +1,66 @@ +process RSEQC_INNERDISTANCE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*distance.txt"), optional:true, emit: distance + tuple val(meta), path("*freq.txt") , optional:true, emit: freq + tuple val(meta), path("*mean.txt") , optional:true, emit: mean + tuple val(meta), path("*.pdf") , optional:true, emit: pdf + tuple val(meta), path("*.r") , optional:true, emit: rscript + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (!meta.single_end) { + """ + inner_distance.py \\ + -i $bam \\ + -r $bed \\ + -o $prefix \\ + $args \\ + > stdout.txt + head -n 2 stdout.txt > ${prefix}.inner_distance_mean.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(inner_distance.py --version | sed -e "s/inner_distance.py //g") + END_VERSIONS + """ + } else { + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(inner_distance.py --version | sed -e "s/inner_distance.py //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.inner_distance.txt + touch ${prefix}.inner_distance_freq.txt + touch ${prefix}.inner_distance_mean.txt + touch ${prefix}.inner_distance_plot.pdf + touch ${prefix}.inner_distance_plot.r + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(inner_distance.py --version | sed -e "s/inner_distance.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/innerdistance/meta.yml b/modules/nf-core/rseqc/innerdistance/meta.yml new file mode 100644 index 0000000..358e4d1 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/meta.yml @@ -0,0 +1,87 @@ +name: rseqc_innerdistance +description: Calculate inner distance between read pairs. +keywords: + - read_pairs + - fragment_size + - inner_distance +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - distance: + - meta: + type: file + description: the inner distances + pattern: "*.inner_distance.txt" + - "*distance.txt": + type: file + description: the inner distances + pattern: "*.inner_distance.txt" + - freq: + - meta: + type: file + description: frequencies of different insert sizes + pattern: "*.inner_distance_freq.txt" + - "*freq.txt": + type: file + description: frequencies of different insert sizes + pattern: "*.inner_distance_freq.txt" + - mean: + - meta: + type: file + description: mean/median values of inner distances + pattern: "*.inner_distance_mean.txt" + - "*mean.txt": + type: file + description: mean/median values of inner distances + pattern: "*.inner_distance_mean.txt" + - pdf: + - meta: + type: file + description: distribution plot of inner distances + pattern: "*.inner_distance_plot.pdf" + - "*.pdf": + type: file + description: distribution plot of inner distances + pattern: "*.inner_distance_plot.pdf" + - rscript: + - meta: + type: file + description: script to reproduce the plot + pattern: "*.inner_distance_plot.R" + - "*.r": + type: file + description: script to reproduce the plot + pattern: "*.inner_distance_plot.R" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/innerdistance/tests/main.nf.test b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test new file mode 100644 index 0000000..cb19fe1 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process RSEQC_INNERDISTANCE" + script "../main.nf" + process "RSEQC_INNERDISTANCE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/innerdistance" + + test("sarscov2 - [[meta] - bam] - bed") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.pdf[0][1]).name).match("pdf") }, + { assert snapshot(process.out.distance).match("distance") }, + { assert snapshot(process.out.freq).match("freq") }, + { assert snapshot(process.out.freq).match("rscript") }, + { assert snapshot(process.out.mean).match("mean") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 - [[meta] - bam] - bed - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/innerdistance/tests/main.nf.test.snap b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test.snap new file mode 100644 index 0000000..83f3336 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test.snap @@ -0,0 +1,189 @@ +{ + "rscript": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,3fc037501f5899b5da009c8ce02fc25e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.467401" + }, + "pdf": { + "content": [ + "test.inner_distance_plot.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.461959" + }, + "distance": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,a1acc9def0f64a5500d4c4cb47cbe32b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.463083" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,3ca19644c7f02a53db3ffe50c7706797" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.474576" + }, + "mean": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,58398b7d5a29a5e564f9e3c50b55996c" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.470638" + }, + "sarscov2 - [[meta] - bam] - bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,3ca19644c7f02a53db3ffe50c7706797" + ], + "distance": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "freq": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "mean": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rscript": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ca19644c7f02a53db3ffe50c7706797" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:22:20.844642" + }, + "freq": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,3fc037501f5899b5da009c8ce02fc25e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.464983" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/innerdistance/tests/nextflow.config b/modules/nf-core/rseqc/innerdistance/tests/nextflow.config new file mode 100644 index 0000000..8730f1c --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/modules/nf-core/rseqc/innerdistance/tests/tags.yml b/modules/nf-core/rseqc/innerdistance/tests/tags.yml new file mode 100644 index 0000000..4a9d0ac --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/innerdistance: + - modules/nf-core/rseqc/innerdistance/** diff --git a/modules/nf-core/rseqc/junctionannotation/environment.yml b/modules/nf-core/rseqc/junctionannotation/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/junctionannotation/main.nf b/modules/nf-core/rseqc/junctionannotation/main.nf new file mode 100644 index 0000000..9a8a465 --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/main.nf @@ -0,0 +1,60 @@ +process RSEQC_JUNCTIONANNOTATION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.xls") , emit: xls + tuple val(meta), path("*.r") , emit: rscript + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.junction.bed"), optional:true, emit: bed + tuple val(meta), path("*.Interact.bed"), optional:true, emit: interact_bed + tuple val(meta), path("*junction.pdf") , optional:true, emit: pdf + tuple val(meta), path("*events.pdf") , optional:true, emit: events_pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + junction_annotation.py \\ + -i $bam \\ + -r $bed \\ + -o $prefix \\ + $args \\ + 2> >(grep -v 'E::idx_find_and_load' | tee ${prefix}.junction_annotation.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_annotation.py --version | sed -e "s/junction_annotation.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.junction.xls + touch ${prefix}.junction_plot.r + touch ${prefix}.junction_annotation.log + touch ${prefix}.junction.bed + touch ${prefix}.Interact.bed + touch ${prefix}.junction.pdf + touch ${prefix}.events.pdf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_annotation.py --version | sed -e "s/junction_annotation.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/junctionannotation/meta.yml b/modules/nf-core/rseqc/junctionannotation/meta.yml new file mode 100644 index 0000000..0622ee4 --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/meta.yml @@ -0,0 +1,105 @@ +name: rseqc_junctionannotation +description: compare detected splice junctions to reference gene model +keywords: + - junctions + - splicing + - rnaseq +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - xls: + - meta: + type: file + description: xls file with junction information + pattern: "*.xls" + - "*.xls": + type: file + description: xls file with junction information + pattern: "*.xls" + - rscript: + - meta: + type: file + description: Rscript to reproduce the plots + pattern: "*.r" + - "*.r": + type: file + description: Rscript to reproduce the plots + pattern: "*.r" + - log: + - meta: + type: file + description: Log file of execution + pattern: "*.junction_annotation.log" + - "*.log": + type: file + description: Log file of execution + pattern: "*.junction_annotation.log" + - bed: + - meta: + type: file + description: bed file of annotated junctions + pattern: "*.junction.bed" + - "*.junction.bed": + type: file + description: bed file of annotated junctions + pattern: "*.junction.bed" + - interact_bed: + - meta: + type: file + description: Interact bed file + pattern: "*.Interact.bed" + - "*.Interact.bed": + type: file + description: Interact bed file + pattern: "*.Interact.bed" + - pdf: + - meta: + type: file + description: junction plot + pattern: "*.junction.pdf" + - "*junction.pdf": + type: file + description: junction plot + pattern: "*.junction.pdf" + - events_pdf: + - meta: + type: file + description: events plot + pattern: "*.events.pdf" + - "*events.pdf": + type: file + description: events plot + pattern: "*.events.pdf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test new file mode 100644 index 0000000..36d5f6e --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process RSEQC_JUNCTIONANNOTATION" + script "../main.nf" + process "RSEQC_JUNCTIONANNOTATION" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/junctionannotation" + + test("sarscov2 - paired end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.rscript.get(0).get(1) ==~ ".*/test.junction_plot.r" }, + { assert process.out.xls.get(0).get(1) ==~ ".*/test.junction.xls" }, + { assert snapshot(process.out.log).match("log") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 - paired end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test.snap b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test.snap new file mode 100644 index 0000000..e87594f --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test.snap @@ -0,0 +1,175 @@ +{ + "log": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_annotation.log:md5,ef37d06d169a1adbeec23fddf82aee2b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:52.732937" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,091865c0ebd6de262c9b0968b5771091" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:52.734515" + }, + "sarscov2 - paired end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,091865c0ebd6de262c9b0968b5771091" + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "events_pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "interact_bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rscript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,091865c0ebd6de262c9b0968b5771091" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:39:10.213511" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/junctionannotation/tests/tags.yml b/modules/nf-core/rseqc/junctionannotation/tests/tags.yml new file mode 100644 index 0000000..5f719fb --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/junctionannotation: + - modules/nf-core/rseqc/junctionannotation/** diff --git a/modules/nf-core/rseqc/junctionsaturation/environment.yml b/modules/nf-core/rseqc/junctionsaturation/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/junctionsaturation/main.nf b/modules/nf-core/rseqc/junctionsaturation/main.nf new file mode 100644 index 0000000..18f063f --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/main.nf @@ -0,0 +1,49 @@ +process RSEQC_JUNCTIONSATURATION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.r") , emit: rscript + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + junction_saturation.py \\ + -i $bam \\ + -r $bed \\ + -o $prefix \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_saturation.py --version | sed -e "s/junction_saturation.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.junctionSaturation_plot.pdf + touch ${prefix}.junctionSaturation_plot.r + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_saturation.py --version | sed -e "s/junction_saturation.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/junctionsaturation/meta.yml b/modules/nf-core/rseqc/junctionsaturation/meta.yml new file mode 100644 index 0000000..b6f422d --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/meta.yml @@ -0,0 +1,60 @@ +name: rseqc_junctionsaturation +description: compare detected splice junctions to reference gene model +keywords: + - junctions + - splicing + - rnaseq +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - pdf: + - meta: + type: file + description: Junction saturation report + pattern: "*.pdf" + - "*.pdf": + type: file + description: Junction saturation report + pattern: "*.pdf" + - rscript: + - meta: + type: file + description: Junction saturation R-script + pattern: "*.r" + - "*.r": + type: file + description: Junction saturation R-script + pattern: "*.r" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test new file mode 100644 index 0000000..7bd3cfc --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process RSEQC_JUNCTIONSATURATION" + script "../main.nf" + process "RSEQC_JUNCTIONSATURATION" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/junctionsaturation" + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.pdf[0][1]).name).match("pdf") }, + { assert snapshot(process.out.rscript).match("rscript") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test.snap b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test.snap new file mode 100644 index 0000000..8d702a4 --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "rscript": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.r:md5,caa6e63dcb477aabb169882b2f30dadd" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:24.018924" + }, + "pdf": { + "content": [ + "test.junctionSaturation_plot.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:24.011942" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,cd4638541a825dbd44e3cb65d2980aa1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:24.021984" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,cd4638541a825dbd44e3cb65d2980aa1" + ], + "pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rscript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cd4638541a825dbd44e3cb65d2980aa1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:43:55.853893" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/junctionsaturation/tests/tags.yml b/modules/nf-core/rseqc/junctionsaturation/tests/tags.yml new file mode 100644 index 0000000..7022b59 --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/junctionsaturation: + - modules/nf-core/rseqc/junctionsaturation/** diff --git a/modules/nf-core/rseqc/readdistribution/environment.yml b/modules/nf-core/rseqc/readdistribution/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/readdistribution/main.nf b/modules/nf-core/rseqc/readdistribution/main.nf new file mode 100644 index 0000000..1d94064 --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/main.nf @@ -0,0 +1,46 @@ +process RSEQC_READDISTRIBUTION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.read_distribution.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + read_distribution.py \\ + -i $bam \\ + -r $bed \\ + > ${prefix}.read_distribution.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(read_distribution.py --version | sed -e "s/read_distribution.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.read_distribution.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(read_distribution.py --version | sed -e "s/read_distribution.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/readdistribution/meta.yml b/modules/nf-core/rseqc/readdistribution/meta.yml new file mode 100644 index 0000000..578200a --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/meta.yml @@ -0,0 +1,51 @@ +name: rseqc_readdistribution +description: Calculate how mapped reads are distributed over genomic features +keywords: + - read distribution + - genomics + - rnaseq +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - txt: + - meta: + type: file + description: the read distribution report + pattern: "*.read_distribution.txt" + - "*.read_distribution.txt": + type: file + description: the read distribution report + pattern: "*.read_distribution.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/readdistribution/tests/main.nf.test b/modules/nf-core/rseqc/readdistribution/tests/main.nf.test new file mode 100644 index 0000000..5fa7695 --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process RSEQC_READDISTRIBUTION" + script "../main.nf" + process "RSEQC_READDISTRIBUTION" + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/readdistribution" + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/readdistribution/tests/main.nf.test.snap b/modules/nf-core/rseqc/readdistribution/tests/main.nf.test.snap new file mode 100644 index 0000000..02e5e61 --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 paired-end [bam]": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.read_distribution.txt:md5,56893fdc0809d968629a363551a1655f" + ] + ], + "1": [ + "versions.yml:md5,fc68a268ab32a0d72a66f270d6c7925e" + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "test.read_distribution.txt:md5,56893fdc0809d968629a363551a1655f" + ] + ], + "versions": [ + "versions.yml:md5,fc68a268ab32a0d72a66f270d6c7925e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T15:38:36.441841" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.read_distribution.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,fc68a268ab32a0d72a66f270d6c7925e" + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "test.read_distribution.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,fc68a268ab32a0d72a66f270d6c7925e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:50:29.345905" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/readdistribution/tests/tags.yml b/modules/nf-core/rseqc/readdistribution/tests/tags.yml new file mode 100644 index 0000000..c0c477b --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/readdistribution: + - modules/nf-core/rseqc/readdistribution/** diff --git a/modules/nf-core/rseqc/readduplication/environment.yml b/modules/nf-core/rseqc/readduplication/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/readduplication/main.nf b/modules/nf-core/rseqc/readduplication/main.nf new file mode 100644 index 0000000..3543380 --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/main.nf @@ -0,0 +1,51 @@ +process RSEQC_READDUPLICATION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*seq.DupRate.xls"), emit: seq_xls + tuple val(meta), path("*pos.DupRate.xls"), emit: pos_xls + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.r") , emit: rscript + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + read_duplication.py \\ + -i $bam \\ + -o $prefix \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(read_duplication.py --version | sed -e "s/read_duplication.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.seq.DupRate.xls + touch ${prefix}.pos.DupRate.xls + touch ${prefix}.DupRate_plot.pdf + touch ${prefix}.DupRate_plot.r + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(read_duplication.py --version | sed -e "s/read_duplication.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/readduplication/meta.yml b/modules/nf-core/rseqc/readduplication/meta.yml new file mode 100644 index 0000000..ef94122 --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/meta.yml @@ -0,0 +1,75 @@ +name: rseqc_readduplication +description: Calculate read duplication rate +keywords: + - rnaseq + - duplication + - sequence-based + - mapping-based +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" +output: + - seq_xls: + - meta: + type: file + description: Read duplication rate determined from mapping position of read + pattern: "*seq.DupRate.xls" + - "*seq.DupRate.xls": + type: file + description: Read duplication rate determined from mapping position of read + pattern: "*seq.DupRate.xls" + - pos_xls: + - meta: + type: file + description: Read duplication rate determined from sequence of read + pattern: "*pos.DupRate.xls" + - "*pos.DupRate.xls": + type: file + description: Read duplication rate determined from sequence of read + pattern: "*pos.DupRate.xls" + - pdf: + - meta: + type: file + description: plot of duplication rate + pattern: "*.pdf" + - "*.pdf": + type: file + description: plot of duplication rate + pattern: "*.pdf" + - rscript: + - meta: + type: file + description: script to reproduce the plot + pattern: "*.R" + - "*.r": + type: file + description: script to reproduce the plot + pattern: "*.R" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/readduplication/tests/main.nf.test b/modules/nf-core/rseqc/readduplication/tests/main.nf.test new file mode 100644 index 0000000..334a4e6 --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process RSEQC_READDUPLICATION" + script "../main.nf" + process "RSEQC_READDUPLICATION" + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/readduplication" + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.pdf[0][1]).name).match("pdf") }, + { assert snapshot(process.out.pos_xls).match("pos_xls") }, + { assert snapshot(process.out.rscript).match("rscript") }, + { assert snapshot(process.out.seq_xls).match("seq_xls") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/readduplication/tests/main.nf.test.snap b/modules/nf-core/rseqc/readduplication/tests/main.nf.test.snap new file mode 100644 index 0000000..143a29d --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/tests/main.nf.test.snap @@ -0,0 +1,167 @@ +{ + "pos_xls": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.pos.DupRate.xls:md5,a859bc2031d46bf1cc4336205847caa3" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T15:43:14.989834" + }, + "rscript": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.DupRate_plot.r:md5,3c0325095cee4835b921e57d61c23dca" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T15:43:15.045525" + }, + "pdf": { + "content": [ + "test.DupRate_plot.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:06.857254" + }, + "seq_xls": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.seq.DupRate.xls:md5,ee8783399eec5a18522a6f08bece338b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T15:43:15.101953" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,8a0721f3247f97135eadb8eecbe142a1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:06.877127" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,8a0721f3247f97135eadb8eecbe142a1" + ], + "pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pos_xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rscript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "seq_xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8a0721f3247f97135eadb8eecbe142a1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:53:03.817194" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/readduplication/tests/tags.yml b/modules/nf-core/rseqc/readduplication/tests/tags.yml new file mode 100644 index 0000000..fce3d35 --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/readduplication: + - modules/nf-core/rseqc/readduplication/** diff --git a/modules/nf-core/rseqc/tin/environment.yml b/modules/nf-core/rseqc/tin/environment.yml new file mode 100644 index 0000000..304e38f --- /dev/null +++ b/modules/nf-core/rseqc/tin/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/tin/main.nf b/modules/nf-core/rseqc/tin/main.nf new file mode 100644 index 0000000..397442f --- /dev/null +++ b/modules/nf-core/rseqc/tin/main.nf @@ -0,0 +1,48 @@ +process RSEQC_TIN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam), path(bai) + path bed + + output: + tuple val(meta), path("*.txt"), emit: txt + tuple val(meta), path("*.xls"), emit: xls + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + tin.py \\ + -i $bam \\ + -r $bed \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(tin.py --version | sed -e "s/tin.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${bam.fileName}.summary.txt + touch ${bam.fileName}.tin.xls + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(tin.py --version | sed -e "s/tin.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/tin/meta.yml b/modules/nf-core/rseqc/tin/meta.yml new file mode 100644 index 0000000..6e363ec --- /dev/null +++ b/modules/nf-core/rseqc/tin/meta.yml @@ -0,0 +1,62 @@ +name: rseqc_tin +description: Calculte TIN (transcript integrity number) from RNA-seq reads +keywords: + - rnaseq + - transcript + - integrity +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] + identifier: biotools:rseqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" + - bai: + type: file + description: Index for input BAM file + pattern: "*.{bai}" + - - bed: + type: file + description: BED file containing the reference gene model + pattern: "*.{bed}" +output: + - txt: + - meta: + type: file + description: TXT file containing tin.py results summary + pattern: "*.txt" + - "*.txt": + type: file + description: TXT file containing tin.py results summary + pattern: "*.txt" + - xls: + - meta: + type: file + description: XLS file containing tin.py results + pattern: "*.xls" + - "*.xls": + type: file + description: XLS file containing tin.py results + pattern: "*.xls" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/rseqc/tin/tests/main.nf.test b/modules/nf-core/rseqc/tin/tests/main.nf.test new file mode 100644 index 0000000..b11eba8 --- /dev/null +++ b/modules/nf-core/rseqc/tin/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process RSEQC_TIN" + script "../main.nf" + process "RSEQC_TIN" + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/tin" + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/tin/tests/main.nf.test.snap b/modules/nf-core/rseqc/tin/tests/main.nf.test.snap new file mode 100644 index 0000000..caab3a0 --- /dev/null +++ b/modules/nf-core/rseqc/tin/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "sarscov2 paired-end [bam]": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.summary.txt:md5,9d98447e178b89a89f6f5aba7a772fe6" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.tin.xls:md5,abdbb6d51a5cd7038cd862ce241ecef1" + ] + ], + "2": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.summary.txt:md5,9d98447e178b89a89f6f5aba7a772fe6" + ] + ], + "versions": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.tin.xls:md5,abdbb6d51a5cd7038cd862ce241ecef1" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:42.823857" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.tin.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.tin.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:55:57.905927" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/tin/tests/tags.yml b/modules/nf-core/rseqc/tin/tests/tags.yml new file mode 100644 index 0000000..741bbd0 --- /dev/null +++ b/modules/nf-core/rseqc/tin/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/tin: + - modules/nf-core/rseqc/tin/** diff --git a/modules/nf-core/samtools/convert/environment.yml b/modules/nf-core/samtools/convert/environment.yml new file mode 100644 index 0000000..62054fc --- /dev/null +++ b/modules/nf-core/samtools/convert/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/convert/main.nf b/modules/nf-core/samtools/convert/main.nf new file mode 100644 index 0000000..cf9253d --- /dev/null +++ b/modules/nf-core/samtools/convert/main.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_CONVERT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.bai") , emit: bai , optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + + """ + samtools view \\ + --threads ${task.cpus} \\ + --reference ${fasta} \\ + $args \\ + $input \\ + -o ${prefix}.${output_extension} + + samtools index -@${task.cpus} ${prefix}.${output_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + def index_extension = output_extension == "bam" ? "bai" : "crai" + + """ + touch ${prefix}.${output_extension} + touch ${prefix}.${index_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/convert/meta.yml b/modules/nf-core/samtools/convert/meta.yml new file mode 100644 index 0000000..d5bfa16 --- /dev/null +++ b/modules/nf-core/samtools/convert/meta.yml @@ -0,0 +1,103 @@ +name: samtools_convert +description: convert and then index CRAM -> BAM or BAM -> CRAM file +keywords: + - view + - index + - bam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference file to create the CRAM file + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Reference index file to create the CRAM file + pattern: "*.{fai}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: filtered/converted BAM file + pattern: "*{.bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: filtered/converted CRAM file + pattern: "*{cram}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: filtered/converted BAM index + pattern: "*{.bai}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: filtered/converted CRAM index + pattern: "*{.crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/convert/tests/main.nf.test b/modules/nf-core/samtools/convert/tests/main.nf.test new file mode 100644 index 0000000..91a0c69 --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/main.nf.test @@ -0,0 +1,107 @@ +nextflow_process { + + name "Test Process SAMTOOLS_CONVERT" + script "../main.nf" + process "SAMTOOLS_CONVERT" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/convert" + + test("sarscov2 - [bam, bai], fasta, fai") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("bam_to_cram_alignment") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("bam_to_cram_index") }, + { assert snapshot(process.out.versions).match("bam_to_cram_versions") } + ) + } + } + + test("homo_sapiens - [cram, crai], fasta, fai") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_alignment") }, + { assert snapshot(file(process.out.bai[0][1]).name).match("cram_to_bam_alignment_index") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("sarscov2 - [bam, bai], fasta, fai - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + } +} diff --git a/modules/nf-core/samtools/convert/tests/main.nf.test.snap b/modules/nf-core/samtools/convert/tests/main.nf.test.snap new file mode 100644 index 0000000..a021254 --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/main.nf.test.snap @@ -0,0 +1,131 @@ +{ + "cram_to_bam_alignment": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:51.300147176" + }, + "bam_to_cram_alignment": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:36.625470184" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:35.516411351" + }, + "bam_to_cram_versions": { + "content": [ + [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:24.694454205" + }, + "stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ], + "bai": [ + + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:45.799885099" + }, + "bam_to_cram_index": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:36.640009334" + }, + "cram_to_bam_alignment_index": { + "content": [ + "test.bam.bai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:51.304477426" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/convert/tests/tags.yml b/modules/nf-core/samtools/convert/tests/tags.yml new file mode 100644 index 0000000..030d5eb --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/convert: + - "modules/nf-core/samtools/convert/**" diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml index 04c82f1..62054fc 100644 --- a/modules/nf-core/samtools/flagstat/environment.yml +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -1,6 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index 9dee35a..c23f3a5 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FLAGSTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) @@ -18,7 +18,6 @@ process SAMTOOLS_FLAGSTAT { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index 9799135..cdc4c25 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -1,5 +1,6 @@ name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG + type keywords: - stats - mapping @@ -17,34 +18,37 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.flagstat": + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 0000000..3b648a3 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BAM - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 0000000..04c3852 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "BAM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:02:58.866491759" + }, + "BAM": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "1": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "versions": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:02:47.383332837" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 0000000..2d2b725 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml index 04c82f1..62054fc 100644 --- a/modules/nf-core/samtools/idxstats/environment.yml +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -1,6 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index b22d084..e2bb6b2 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_IDXSTATS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) @@ -18,7 +18,6 @@ process SAMTOOLS_IDXSTATS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml index 344e92a..f0a6bcb 100644 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -18,34 +18,37 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - idxstats: - type: file - description: File containing samtools idxstats output - pattern: "*.{idxstats}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.idxstats": + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 0000000..5fd1fc7 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + }} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 0000000..2cc89a3 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:11:56.466856235" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "1": [ + "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "versions": [ + "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:11:46.311550359" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 0000000..d3057c6 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 04c82f1..62054fc 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -1,6 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 256bd7c..3117561 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input) @@ -35,10 +35,11 @@ process SAMTOOLS_INDEX { """ stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" """ - touch ${input}.bai - touch ${input}.crai - touch ${input}.csi + touch ${input}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 01a4ee0..db8df0d 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -15,38 +15,52 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - csi: - type: file - description: CSI index file - pattern: "*.{csi}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 0000000..0ed260e --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 0000000..ca34fb5 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 0000000..72d65e8 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 0000000..e0f58a7 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml index 04c82f1..62054fc 100644 --- a/modules/nf-core/samtools/merge/environment.yml +++ b/modules/nf-core/samtools/merge/environment.yml @@ -1,6 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index 21f785c..34da4c7 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_MERGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") @@ -44,10 +44,14 @@ process SAMTOOLS_MERGE { """ stub: + def args = task.ext.args ?: '' prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" """ touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 2e8f3db..235aa21 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -15,60 +15,81 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_files: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram,sam}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: Reference file the CRAM was created with (optional) - pattern: "*.{fasta,fa}" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fai: - type: file - description: Index of the reference file the CRAM was created with (optional) - pattern: "*.fai" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" - cram: - type: file - description: CRAM file - pattern: "*.{cram}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" - csi: - type: file - description: BAM index file (optional) - pattern: "*.csi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" - crai: - type: file - description: CRAM index file (optional) - pattern: "*.crai" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@yuukiiwa " diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 0000000..8c5668c --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 0000000..40b36e8 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,137 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("bams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } + ) + } + } + + test("crams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, + { assert snapshot(process.out.bam).match("crams_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, + { assert snapshot(process.out.csi).match("crams_csi") }, + { assert snapshot(process.out.versions).match("crams_versions") } + ) + } + } + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bams_stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 0000000..0a41e01 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,228 @@ +{ + "crams_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.647389" + }, + "bams_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.937013" + }, + "bams_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.928616" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.923289" + }, + "bams_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.925716" + }, + "crams_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.655959" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.319539" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:16:30.476887194" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.92719" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.940498" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.328852" + }, + "bams_stub_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:16:52.203823961" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.324219" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.933153" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:29:57.524363148" + }, + "crams_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.650652" + }, + "crams_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:16:06.977096207" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.33292" + }, + "crams_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.653512" + }, + "bams_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.943839" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml new file mode 100644 index 0000000..b869abc --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/merge: + - "modules/nf-core/samtools/merge/**" diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml index 04c82f1..62054fc 100644 --- a/modules/nf-core/samtools/stats/environment.yml +++ b/modules/nf-core/samtools/stats/environment.yml @@ -1,6 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 07286ef..4443948 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_STATS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input), path(input_index) @@ -19,7 +19,6 @@ process SAMTOOLS_STATS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 735ff81..77b020f 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -16,43 +16,46 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: Reference file the CRAM was created with (optional) - pattern: "*.{fasta,fa}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - stats: - type: file - description: File containing samtools stats output - pattern: "*.{stats}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.stats": + type: file + description: File containing samtools stats output + pattern: "*.{stats}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@FriederikeHanssen" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test index e037132..5bc8930 100644 --- a/modules/nf-core/samtools/stats/tests/main.nf.test +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -3,76 +3,111 @@ nextflow_process { name "Test Process SAMTOOLS_STATS" script "../main.nf" process "SAMTOOLS_STATS" + tag "modules" - tag "modules/nf-core" + tag "modules_nfcore" tag "samtools" tag "samtools/stats" - test("SAMTOOLS STATS Should run without failures") { + test("bam") { when { - params { - - outdir = "$outputDir" - } process { """ - // define inputs of the process here. - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) - - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) input[1] = [[],[]] """ - } } then { assertAll( - {assert process.success}, - {assert snapshot(process.out).match()} + {assert process.success}, + {assert snapshot(process.out).match()} ) } - } - test("SAMTOOLS CRAM Should run without failures") { + test("cram") { when { - params { - - outdir = "$outputDir" - } process { """ - // define inputs of the process here - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) - - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) """ } - - } then { assertAll( - {assert process.success}, - {assert snapshot(process.out).match()} + {assert process.success}, + {assert snapshot(process.out).match()} ) } + } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } } + test("cram - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } } diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap index 516b2b0..df507be 100644 --- a/modules/nf-core/samtools/stats/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "SAMTOOLS STATS Should run without failures": { + "cram": { "content": [ { "0": [ @@ -8,11 +8,11 @@ "id": "test", "single_end": false }, - "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" ] ], "1": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" ], "stats": [ [ @@ -20,17 +20,21 @@ "id": "test", "single_end": false }, - "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" ] ], "versions": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" ] } ], - "timestamp": "2023-10-18T12:12:42.998746" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:16.767396182" }, - "SAMTOOLS CRAM Should run without failures": { + "bam - stub": { "content": [ { "0": [ @@ -39,11 +43,11 @@ "id": "test", "single_end": false }, - "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" ], "stats": [ [ @@ -51,14 +55,88 @@ "id": "test", "single_end": false }, - "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,08035f3409d934d47a416150884bb0df" + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" ] } ], - "timestamp": "2023-10-18T12:13:30.747222" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:29.721580274" + }, + "cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:53.567964304" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d53a2584376d78942839e9933a34d11b" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d53a2584376d78942839e9933a34d11b" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:28:50.73610604" } } \ No newline at end of file diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml new file mode 100644 index 0000000..fab4408 --- /dev/null +++ b/modules/nf-core/sortmerna/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sortmerna=4.3.6 diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf new file mode 100644 index 0000000..7c17e50 --- /dev/null +++ b/modules/nf-core/sortmerna/main.nf @@ -0,0 +1,110 @@ +process SORTMERNA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.6--h9ee0642_0' : + 'biocontainers/sortmerna:4.3.6--h9ee0642_0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fastas) + tuple val(meta3), path(index) + + output: + tuple val(meta), path("*non_rRNA.fastq.gz"), emit: reads, optional: true + tuple val(meta), path("*.log") , emit: log, optional: true + tuple val(meta2), path("idx") , emit: index, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def index_only = args.contains('--index 1')? true : false + def skip_index = args.contains('--index 0')? true : false + def paired_end = reads instanceof List + def paired_cmd = '' + def reads_args = '' + def out2_cmd = '' + def mv_cmd = '' + def reads_input = '' + def refs_input = '' + + if (! index_only){ + reads_args = '--aligned rRNA_reads --fastx --other non_rRNA_reads' + reads_input = paired_end ? reads.collect{"--reads $it"}.join(' ') : "--reads $reads" + def n_fastq = paired_end ? reads.size() : 1 + if ( n_fastq == 1 ) { + mv_cmd = """ + mv non_rRNA_reads.f*q.gz ${prefix}.non_rRNA.fastq.gz + mv rRNA_reads.log ${prefix}.sortmerna.log + """ + } else { + mv_cmd = """ + mv non_rRNA_reads_fwd.f*q.gz ${prefix}_1.non_rRNA.fastq.gz + mv non_rRNA_reads_rev.f*q.gz ${prefix}_2.non_rRNA.fastq.gz + mv rRNA_reads.log ${prefix}.sortmerna.log + """ + paired_cmd = "--paired_in" + out2_cmd = "--out2" + } + } + """ + sortmerna \\ + ${'--ref '+fastas.join(' --ref ')} \\ + $refs_input \\ + $reads_input \\ + --threads $task.cpus \\ + --workdir . \\ + $reads_args \\ + $paired_cmd \\ + $out2_cmd \\ + $args + + $mv_cmd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def index_only = args.contains('--index 1')? true : false + def paired_end = reads instanceof List + def paired_cmd = '' + def out2_cmd = '' + def mv_cmd = '' + def reads_input = '' + + if (! index_only){ + reads_input = paired_end ? reads.collect{"--reads $it"}.join(' ') : "--reads $reads" + def n_fastq = paired_end ? reads.size() : 1 + if ( n_fastq == 1 ) { + mv_cmd = "touch ${prefix}.non_rRNA.fastq.gz" + } else { + mv_cmd = """ + touch ${prefix}_1.non_rRNA.fastq.gz + touch ${prefix}_2.non_rRNA.fastq.gz + """ + } + } + """ + $mv_cmd + mkdir -p idx + touch ${prefix}.sortmerna.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml new file mode 100644 index 0000000..3b43d00 --- /dev/null +++ b/modules/nf-core/sortmerna/meta.yml @@ -0,0 +1,94 @@ +name: sortmerna +description: Local sequence alignment tool for filtering, mapping and clustering. +keywords: + - filtering + - mapping + - clustering + - rRNA + - ribosomal RNA +tools: + - SortMeRNA: + description: The core algorithm is based on approximate seeds and allows for sensitive + analysis of NGS reads. The main application of SortMeRNA is filtering rRNA from + metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, + fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart + aligned and rejected reads into two files. Additional applications include clustering + and taxonomy assignation available through QIIME v1.9.1. SortMeRNA works with + Illumina, Ion Torrent and PacBio data, and can produce SAM and BLAST-like alignments. + homepage: https://hpc.nih.gov/apps/sortmeRNA.html + documentation: https://github.com/biocore/sortmerna/wiki/ + licence: ["GPL-3.0-or-later"] + identifier: biotools:sortmerna +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fastas: + type: file + description: | + Path to reference file(s) + - - meta3: + type: map + description: | + Groovy Map containing index information + e.g. [ id:'test' ] + - index: + type: directory + description: | + Path to index directory of a previous sortmerna run +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ], or reference information from an + indexing-only run + - "*non_rRNA.fastq.gz": + type: file + description: The filtered fastq reads + pattern: "*fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ], or reference information from an + indexing-only run + - "*.log": + type: file + description: SortMeRNA log file + pattern: "*sortmerna.log" + - index: + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - idx: + type: directory + description: | + Path to index directory generated by sortmern + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@mashehu" +maintainers: + - "@drpatelh" + - "@mashehu" diff --git a/modules/nf-core/sortmerna/tests/indexing_only.config b/modules/nf-core/sortmerna/tests/indexing_only.config new file mode 100644 index 0000000..3e74a32 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/indexing_only.config @@ -0,0 +1,5 @@ +process { + withName: 'SORTMERNA' { + ext.args = '--index 1' + } +} diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test new file mode 100644 index 0000000..73bc119 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/main.nf.test @@ -0,0 +1,327 @@ +nextflow_process { + + name "Test Process SORTMERNA" + script "../main.nf" + process "SORTMERNA" + tag "modules" + tag "modules_nfcore" + tag "sortmerna" + + test("sarscov2 indexing only") { + + config './indexing_only.config' + + when { + process { + """ + input[0] = Channel.of([[],[]]) + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert ! process.out.reads }, + { assert snapshot(process.out.index).match("index_index_only") }, + { assert snapshot(process.out.versions).match("versions_index_only") } + ) + } + + } + + test("sarscov2 indexing only stub") { + + options '-stub' + config './indexing_only.config' + + when { + process { + """ + input[0] = Channel.of([[],[]]) + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert ! process.out.reads }, + { assert snapshot(process.out.index).match("index_only_stub") }, + { assert snapshot(process.out.versions).match("versions_index_only_stub") } + ) + } + + } + + test("sarscov2 single_end") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reads }, + { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end_match") + }, + { assert snapshot(process.out.index).match("index_single_end") }, + { assert snapshot(process.out.versions).match("versions_single_end") } + ) + } + + } + + test("sarscov2 single_end stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end-for_stub_match") + }, + { assert snapshot(process.out.index).match("index_single_end_stub") }, + { assert snapshot(process.out.versions).match("versions_single_end_stub") } + ) + } + + } + + test("sarscov2 paired_end") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ] + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reads }, + { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 200 (100.00)") }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 paired_end_match") + }, + { assert snapshot(process.out.index).match("index_paired_end") }, + { assert snapshot(process.out.versions).match("versions_paired_end") } + ) + } + + } + + test("sarscov2 paired_end stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ] + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 paired_end-for_stub_match") + }, + { assert snapshot(process.out.index).match("index_paired_end_stub") }, + { assert snapshot(process.out.versions).match("versions_paired_end_stub") } + ) + } + + } + + test("sarscov2 single_end premade_index") { + + config './premade_index.config' + + setup { + + run("SORTMERNA", alias: "SORTMERNA_INDEX") { + script "../main.nf" + process { + """ + input[0] = Channel.of([[],[]]) + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = SORTMERNA_INDEX.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reads }, + { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end_premade_index_match") + }, + { assert snapshot(process.out.index).match("index_single_end_premade_index") }, + { assert snapshot(process.out.versions).match("versions_single_end_premade_index") } + ) + } + } + + test("sarscov2 single_end premade_index stub") { + + config './premade_index.config' + options '-stub' + + setup { + + run("SORTMERNA", alias: "SORTMERNA_INDEX") { + script "../main.nf" + process { + """ + input[0] = Channel.of([[],[]]) + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = Channel.of([[],[]]) + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + input[1] = [ [id:'test2'], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ] + ] + input[2] = SORTMERNA_INDEX.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reads }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end_premade_index_match_stub") + }, + { assert snapshot(process.out.index).match("index_single_end_premade_index_stub") }, + { assert snapshot(process.out.versions).match("versions_single_end_premade_index_stub") } + ) + } + } +} diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap new file mode 100644 index 0000000..86e8473 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap @@ -0,0 +1,352 @@ +{ + "versions_paired_end_stub": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T12:27:11.223149" + }, + "index_paired_end_stub": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:24:25.384097178" + }, + "versions_paired_end": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T12:27:04.517155" + }, + "versions_single_end_stub": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:10:35.228450189" + }, + "sarscov2 single_end_match": { + "content": [ + [ + "test.non_rRNA.fastq.gz", + "test.sortmerna.log", + "{id=test}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T12:28:23.20327" + }, + "index_only_stub": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:00:47.128504164" + }, + "index_single_end_premade_index": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a", + "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7", + "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef", + "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:01:53.832643452" + }, + "versions_single_end_premade_index": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:01:53.902154982" + }, + "sarscov2 paired_end-for_stub_match": { + "content": [ + [ + "{id=test}", + [ + "test_1.non_rRNA.fastq.gz", + "test_2.non_rRNA.fastq.gz" + ], + "test.sortmerna.log" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T12:28:56.063579" + }, + "index_paired_end": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a", + "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7", + "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef", + "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:24:14.272659781" + }, + "sarscov2 single_end_premade_index_match_stub": { + "content": [ + [ + "test.non_rRNA.fastq.gz", + "test.sortmerna.log", + "{id=test}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:05:01.913287272" + }, + "sarscov2 single_end-for_stub_match": { + "content": [ + [ + "test.non_rRNA.fastq.gz", + "test.sortmerna.log", + "{id=test}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T12:28:29.197913" + }, + "sarscov2 paired_end_match": { + "content": [ + [ + "{id=test}", + [ + "test_1.non_rRNA.fastq.gz", + "test_2.non_rRNA.fastq.gz" + ], + "test.sortmerna.log" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T12:28:49.914992" + }, + "versions_single_end": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:36:27.14244294" + }, + "versions_index_only": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:00:35.609161481" + }, + "versions_single_end_premade_index_stub": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:05:02.059858431" + }, + "index_single_end_stub": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:23:58.068772508" + }, + "versions_index_only_stub": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:00:47.169402699" + }, + "index_single_end_premade_index_stub": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:05:01.953316205" + }, + "index_single_end": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a", + "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7", + "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef", + "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:36:26.88061978" + }, + "index_index_only": { + "content": [ + [ + [ + { + "id": "test2" + }, + [ + "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a", + "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7", + "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef", + "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:00:35.34089252" + }, + "sarscov2 single_end_premade_index_match": { + "content": [ + [ + "test.non_rRNA.fastq.gz", + "test.sortmerna.log", + "{id=test}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T15:01:53.797737296" + } +} \ No newline at end of file diff --git a/modules/nf-core/sortmerna/tests/premade_index.config b/modules/nf-core/sortmerna/tests/premade_index.config new file mode 100644 index 0000000..ab86d2e --- /dev/null +++ b/modules/nf-core/sortmerna/tests/premade_index.config @@ -0,0 +1,8 @@ +process { + withName: 'SORTMERNA_INDEX' { + ext.args = '--index 1' + } + withName: 'SORTMERNA' { + ext.args = '--index 0' + } +} diff --git a/modules/nf-core/sortmerna/tests/tags.yml b/modules/nf-core/sortmerna/tests/tags.yml new file mode 100644 index 0000000..e088480 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/tags.yml @@ -0,0 +1,2 @@ +sortmerna: + - modules/nf-core/sortmerna/** diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml index a61d2f6..7c57530 100644 --- a/modules/nf-core/star/align/environment.yml +++ b/modules/nf-core/star/align/environment.yml @@ -1,8 +1,9 @@ channels: - conda-forge - bioconda - - defaults + dependencies: - - bioconda::star=2.7.10a - - bioconda::samtools=1.16.1 + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index fa645a6..417071b 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -4,8 +4,8 @@ process STAR_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" input: tuple val(meta), path(reads, stageAs: "input*/*") @@ -21,33 +21,34 @@ process STAR_ALIGN { tuple val(meta), path('*Log.progress.out'), emit: log_progress path "versions.yml" , emit: versions - tuple val(meta), path('*d.out.bam') , optional:true, emit: bam - tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted - tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript - tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted - tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq - tuple val(meta), path('*.tab') , optional:true, emit: tab - tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab - tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab - tuple val(meta), path('*.out.junction') , optional:true, emit: junction - tuple val(meta), path('*.out.sam') , optional:true, emit: sam - tuple val(meta), path('*.wig') , optional:true, emit: wig - tuple val(meta), path('*.bg') , optional:true, emit: bedgraph + tuple val(meta), path('*d.out.bam') , optional:true, emit: bam + tuple val(meta), path("${prefix}.sortedByCoord.out.bam") , optional:true, emit: bam_sorted + tuple val(meta), path("${prefix}.Aligned.sortedByCoord.out.bam") , optional:true, emit: bam_sorted_aligned + tuple val(meta), path('*toTranscriptome.out.bam') , optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab + tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab + tuple val(meta), path('*.out.junction') , optional:true, emit: junction + tuple val(meta), path('*.out.sam') , optional:true, emit: sam + tuple val(meta), path('*.wig') , optional:true, emit: wig + tuple val(meta), path('*.bg') , optional:true, emit: bedgraph when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def reads1 = [], reads2 = [] meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" def seq_center = seq_center ? "'CN:$seq_center'" : "" - def attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" + attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' - def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' """ STAR \\ --genomeDir $index \\ @@ -79,8 +80,10 @@ process STAR_ALIGN { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ + echo "" | gzip > ${prefix}.unmapped_1.fastq.gz + echo "" | gzip > ${prefix}.unmapped_2.fastq.gz touch ${prefix}Xd.out.bam touch ${prefix}.Log.final.out touch ${prefix}.Log.out @@ -89,8 +92,6 @@ process STAR_ALIGN { touch ${prefix}.toTranscriptome.out.bam touch ${prefix}.Aligned.unsort.out.bam touch ${prefix}.Aligned.sortedByCoord.out.bam - touch ${prefix}.unmapped_1.fastq.gz - touch ${prefix}.unmapped_2.fastq.gz touch ${prefix}.tab touch ${prefix}.SJ.out.tab touch ${prefix}.ReadsPerGene.out.tab diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index e80dbb7..5cfe763 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -14,97 +14,212 @@ tools: manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] + identifier: biotools:star input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - index: - type: directory - description: STAR genome index - pattern: "star" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - gtf: - type: file - description: Annotation GTF file - pattern: "*.{gtf}" - - star_ignore_sjdbgtf: - type: boolean - description: Ignore annotation GTF file - - seq_platform: - type: string - description: Sequencing platform - - seq_center: - type: string - description: Sequencing center + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - index: + type: directory + description: STAR genome index + pattern: "star" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - - seq_platform: + type: string + description: Sequencing platform + - - seq_center: + type: string + description: Sequencing center output: - - bam: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - log_final: - type: file - description: STAR final log file - pattern: "*Log.final.out" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.final.out": + type: file + description: STAR final log file + pattern: "*Log.final.out" - log_out: - type: file - description: STAR lot out file - pattern: "*Log.out" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.out": + type: file + description: STAR lot out file + pattern: "*Log.out" - log_progress: - type: file - description: STAR log progress file - pattern: "*Log.progress.out" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.progress.out": + type: file + description: STAR log progress file + pattern: "*Log.progress.out" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*d.out.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" - bam_sorted: - type: file - description: Sorted BAM file of read alignments (optional) - pattern: "*sortedByCoord.out.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sortedByCoord.out.bam: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" + - bam_sorted_aligned: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.Aligned.sortedByCoord.out.bam: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*.Aligned.sortedByCoord.out.bam" - bam_transcript: - type: file - description: Output BAM file of transcriptome alignment (optional) - pattern: "*toTranscriptome.out.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*toTranscriptome.out.bam": + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" - bam_unsorted: - type: file - description: Unsorted BAM file of read alignments (optional) - pattern: "*Aligned.unsort.out.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Aligned.unsort.out.bam": + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" - fastq: - type: file - description: Unmapped FastQ files (optional) - pattern: "*fastq.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*fastq.gz": + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" - tab: - type: file - description: STAR output tab file(s) (optional) - pattern: "*.tab" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tab": + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - spl_junc_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.SJ.out.tab": + type: file + description: STAR output splice junction tab file + pattern: "*.SJ.out.tab" + - read_per_gene_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ReadsPerGene.out.tab": + type: file + description: STAR output read per gene tab file + pattern: "*.ReadsPerGene.out.tab" - junction: - type: file - description: STAR chimeric junction output file (optional) - pattern: "*.out.junction" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.out.junction": + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.out.sam" + - "*.out.sam": + type: file + description: STAR output SAM file(s) (optional) + pattern: "*.out.sam" - wig: - type: file - description: STAR output wiggle format file(s) (optional) - pattern: "*.wig" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.wig": + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" - bedgraph: - type: file - description: STAR output bedGraph format file(s) (optional) - pattern: "*.bg" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bg": + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" authors: - "@kevinmenden" - "@drpatelh" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 0000000..a62c17d --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,593 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + tag "star/genomegenerate" + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + file(process.out.junction[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - single_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba - stub") { + options "-stub" + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion - stub") { + options "-stub" + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 0000000..b533fb8 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,1913 @@ +{ + "homo_sapiens - single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:08.738074176" + }, + "homo_sapiens - paired_end - arriba - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:36.122131869" + }, + "homo_sapiens - single_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "9f76be49a6607613a64f760101bdddce", + "9f76be49a6607613a64f760101bdddce", + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:01:22.197991909" + }, + "homo_sapiens - paired_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "db9a8324b5163b025bcc0c33e848486", + "db9a8324b5163b025bcc0c33e848486", + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:02:06.988663857" + }, + "homo_sapiens - paired_end - multiple - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:10:12.005468781" + }, + "homo_sapiens - paired_end - multiple": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "3e54e45f5dc3e9c1f2fc55bc41531a87", + "3e54e45f5dc3e9c1f2fc55bc41531a87", + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:08:54.877286681" + }, + "homo_sapiens - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:20.911466345" + }, + "homo_sapiens - paired_end - starfusion": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "test.Chimeric.out.junction", + "caee9dcda13882d4913456973c25b57a", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:07:25.0639914" + }, + "homo_sapiens - paired_end - arriba": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "1a3abe88fb2490589c58497d39921bcc", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:04:00.685784211" + }, + "homo_sapiens - paired_end - starfusion - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:53.173671551" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 0000000..cf09323 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 0000000..18bc2ee --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 0000000..7880bfc --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 0000000..8beace1 --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 0000000..7c57530 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf new file mode 100644 index 0000000..8f0c67e --- /dev/null +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -0,0 +1,119 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml new file mode 100644 index 0000000..33c1f65 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -0,0 +1,56 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] + identifier: biotools:star +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - star: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 0000000..4d619c4 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("fasta_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions) + .match() } + ) + } + } + + test("fasta") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match() } + ) + } + } + + test("fasta_gtf_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 0000000..3db2567 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "fasta_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]", + [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:37:47.410432728" + }, + "fasta_gtf_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:38:09.165234795" + }, + "fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:38:19.530862664" + }, + "fasta": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]", + [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:37:58.667436398" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 0000000..79f619b --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/subread/featurecounts/main.nf b/modules/nf-core/subread/featurecounts/main.nf deleted file mode 100644 index 2097996..0000000 --- a/modules/nf-core/subread/featurecounts/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process SUBREAD_FEATURECOUNTS { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/subread:2.0.1--hed695b0_0' : - 'biocontainers/subread:2.0.1--hed695b0_0' }" - - input: - tuple val(meta), path(bams), path(annotation) - - output: - tuple val(meta), path("*featureCounts.txt") , emit: counts - tuple val(meta), path("*featureCounts.txt.summary"), emit: summary - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def paired_end = meta.single_end ? '' : '-p' - - def strandedness = 0 - if (meta.strandedness == 'forward') { - strandedness = 1 - } else if (meta.strandedness == 'reverse') { - strandedness = 2 - } - """ - featureCounts \\ - $args \\ - $paired_end \\ - -T $task.cpus \\ - -a $annotation \\ - -s $strandedness \\ - -o ${prefix}.featureCounts.txt \\ - ${bams.join(' ')} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - subread: \$( echo \$(featureCounts -v 2>&1) | sed -e "s/featureCounts v//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/subread/featurecounts/meta.yml b/modules/nf-core/subread/featurecounts/meta.yml deleted file mode 100644 index 38a3794..0000000 --- a/modules/nf-core/subread/featurecounts/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: subread_featurecounts -description: Count reads that map to genomic features -keywords: - - counts - - fasta - - genome - - reference -tools: - - featurecounts: - description: featureCounts is a highly efficient general-purpose read summarization program that counts mapped reads for genomic features such as genes, exons, promoter, gene bodies, genomic bins and chromosomal locations. It can be used to count both RNA-seq and genomic DNA-seq reads. - homepage: http://bioinf.wehi.edu.au/featureCounts/ - documentation: http://bioinf.wehi.edu.au/subread-package/SubreadUsersGuide.pdf - doi: "10.1093/bioinformatics/btt656" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/SAM file containing read alignments - pattern: "*.{bam}" - - annotation: - type: file - description: Genomic features annotation in GTF or SAF - pattern: "*.{gtf,saf}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - counts: - type: file - description: Counts of reads mapping to features - pattern: "*featureCounts.txt" - - summary: - type: file - description: Summary log file - pattern: "*.featureCounts.txt.summary" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@ntoda03" -maintainers: - - "@ntoda03" diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml new file mode 100644 index 0000000..b1efd94 --- /dev/null +++ b/modules/nf-core/trimgalore/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cutadapt=4.9 + - bioconda::trim-galore=0.6.10 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf new file mode 100644 index 0000000..e55683c --- /dev/null +++ b/modules/nf-core/trimgalore/main.nf @@ -0,0 +1,107 @@ +process TRIMGALORE { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9b/9becad054093ad4083a961d12733f2a742e11728fe9aa815d678b882b3ede520/data' + : 'community.wave.seqera.io/library/cutadapt_trim-galore_pigz:a98edd405b34582d'}" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log , optional: true + tuple val(meta), path("*unpaired*.fq.gz"), emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Calculate number of --cores for TrimGalore based on value of task.cpus + // See: https://github.com/FelixKrueger/TrimGalore/blob/master/CHANGELOG.md#version-060-release-on-1-mar-2019 + // See: https://github.com/nf-core/atacseq/pull/65 + def cores = 1 + if (task.cpus) { + cores = (task.cpus as int) - 4 + if (meta.single_end) { + cores = (task.cpus as int) - 3 + } + if (cores < 1) { + cores = 1 + } + if (cores > 8) { + cores = 8 + } + } + + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + def args_list = args.split("\\s(?=--)").toList() + args_list.removeAll { it.toLowerCase().contains('_r2 ') } + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s ${reads} ${prefix}.fastq.gz + trim_galore \\ + ${args_list.join(' ')} \\ + --cores ${cores} \\ + --gzip \\ + ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } + else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + trim_galore \\ + ${args} \\ + --cores ${cores} \\ + --paired \\ + --gzip \\ + ${prefix}_1.fastq.gz \\ + ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + output_command = "echo '' | gzip > ${prefix}_trimmed.fq.gz ;" + output_command += "touch ${prefix}.fastq.gz_trimming_report.txt" + } + else { + output_command = "echo '' | gzip > ${prefix}_1_trimmed.fq.gz ;" + output_command += "touch ${prefix}_1.fastq.gz_trimming_report.txt ;" + output_command += "echo '' | gzip > ${prefix}_2_trimmed.fq.gz ;" + output_command += "touch ${prefix}_2.fastq.gz_trimming_report.txt" + } + """ + ${output_command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml new file mode 100644 index 0000000..576cb4c --- /dev/null +++ b/modules/nf-core/trimgalore/meta.yml @@ -0,0 +1,95 @@ +name: trimgalore +description: Trim FastQ files using Trim Galore! +keywords: + - trimming + - adapters + - sequencing adapters + - fastq +tools: + - trimgalore: + description: | + A wrapper tool around Cutadapt and FastQC to consistently apply quality + and adapter trimming to FastQ files, with some extra functionality for + MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*{3prime,5prime,trimmed,val}*.fq.gz": + type: file + description: | + List of input adapter trimmed FastQ files of size 1 and 2 for + single-end and paired-end data, respectively. + pattern: "*{3prime,5prime,trimmed,val}*.fq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*report.txt": + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - unpaired: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*unpaired*.fq.gz": + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - zip: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/trimgalore/tests/main.nf.test b/modules/nf-core/trimgalore/tests/main.nf.test new file mode 100644 index 0000000..37b054f --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test @@ -0,0 +1,154 @@ +nextflow_process { + + name "Test Process TRIMGALORE" + script "../main.nf" + process "TRIMGALORE" + tag "modules" + tag "modules_nfcore" + tag "trimgalore" + + test("test_trimgalore_single_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1)).getText().contains(report1_line) } + } + }, + { assert snapshot(path(process.out.versions.get(0)).yaml).match() }, + ) + } + } + + test("test_trimgalore_single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions.get(0)).yaml + ).match() }, + ) + } + } + + test("test_trimgalore_paired_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ] + """ + } + } + + then { + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1).get(0)).getText().contains(report1_line) } + } + }, + { report2_lines.each { report2_line -> + { assert path(process.out.log.get(0).get(1).get(1)).getText().contains(report2_line) } + } + }, + { assert snapshot(path(process.out.versions.get(0)).yaml).match() }, + ) + } + } + + test("test_trimgalore_paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(path(process.out.versions.get(0)).yaml).match("versions") }, + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/trimgalore/tests/main.nf.test.snap b/modules/nf-core/trimgalore/tests/main.nf.test.snap new file mode 100644 index 0000000..4c73180 --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test.snap @@ -0,0 +1,211 @@ +{ + "test_trimgalore_single_end": { + "content": [ + { + "TRIMGALORE": { + "trimgalore": "0.6.10", + "cutadapt": 4.9, + "pigz": 2.8 + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T22:56:18.454197" + }, + "test_trimgalore_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,5928323d579768de37e83c56c821757f" + ], + "html": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "unpaired": [ + + ], + "versions": [ + "versions.yml:md5,5928323d579768de37e83c56c821757f" + ], + "zip": [ + + ] + }, + { + "TRIMGALORE": { + "trimgalore": "0.6.10", + "cutadapt": 4.9, + "pigz": 2.8 + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T22:56:22.085472" + }, + "test_trimgalore_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_2.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,5928323d579768de37e83c56c821757f" + ], + "html": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_2.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "unpaired": [ + + ], + "versions": [ + "versions.yml:md5,5928323d579768de37e83c56c821757f" + ], + "zip": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T22:44:33.751013" + }, + "versions": { + "content": [ + { + "TRIMGALORE": { + "trimgalore": "0.6.10", + "cutadapt": 4.9, + "pigz": 2.8 + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T22:57:28.77107" + }, + "test_trimgalore_paired_end": { + "content": [ + { + "TRIMGALORE": { + "trimgalore": "0.6.10", + "cutadapt": 4.9, + "pigz": 2.8 + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T22:56:27.019872" + } +} \ No newline at end of file diff --git a/modules/nf-core/trimgalore/tests/tags.yml b/modules/nf-core/trimgalore/tests/tags.yml new file mode 100644 index 0000000..e993769 --- /dev/null +++ b/modules/nf-core/trimgalore/tests/tags.yml @@ -0,0 +1,2 @@ +trimgalore: + - modules/nf-core/trimgalore/** diff --git a/modules/nf-core/umitools/dedup/environment.yml b/modules/nf-core/umitools/dedup/environment.yml index 5644530..9f9e03c 100644 --- a/modules/nf-core/umitools/dedup/environment.yml +++ b/modules/nf-core/umitools/dedup/environment.yml @@ -1,6 +1,5 @@ channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::umi_tools=1.1.4 + - bioconda::umi_tools=1.1.5 diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf index 5e48704..1e2a2aa 100644 --- a/modules/nf-core/umitools/dedup/main.nf +++ b/modules/nf-core/umitools/dedup/main.nf @@ -4,8 +4,8 @@ process UMITOOLS_DEDUP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : - 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.5--py39hf95cd2a_0' : + 'biocontainers/umi_tools:1.1.5--py39hf95cd2a_0' }" input: tuple val(meta), path(bam), path(bai) @@ -42,11 +42,12 @@ process UMITOOLS_DEDUP { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) END_VERSIONS """ stub: + prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.bam touch ${prefix}.log @@ -56,7 +57,7 @@ process UMITOOLS_DEDUP { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) END_VERSIONS """ } diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml index 38d3fd4..6cbd841 100644 --- a/modules/nf-core/umitools/dedup/meta.yml +++ b/modules/nf-core/umitools/dedup/meta.yml @@ -1,5 +1,6 @@ name: umitools_dedup -description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached + to the read. keywords: - umitools - deduplication @@ -7,60 +8,89 @@ keywords: tools: - umi_tools: description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random + Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes documentation: https://umi-tools.readthedocs.io/en/latest/ license: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" - - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" - - get_output_stats: - type: boolean - description: | - Whether or not to generate output stats. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" + - - get_output_stats: + type: boolean + description: | + Whether or not to generate output stats. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" - log: - type: file - description: File with logging information - pattern: "*.{log}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: File with logging information + pattern: "*.{log}" - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*edit_distance.tsv": + type: file + description: Reports the (binned) average edit distance between the UMIs at + each position. + pattern: "*edit_distance.tsv" - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*per_umi.tsv": + type: file + description: UMI-level summary statistics. + pattern: "*per_umi.tsv" - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*per_position.tsv": + type: file + description: Tabulates the counts for unique combinations of UMI and position. + pattern: "*per_position.tsv" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test b/modules/nf-core/umitools/dedup/tests/main.nf.test new file mode 100644 index 0000000..f00a8cb --- /dev/null +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test @@ -0,0 +1,188 @@ +nextflow_process { + + name "Test Process UMITOOLS_DEDUP" + script "../main.nf" + process "UMITOOLS_DEDUP" + + tag "modules" + tag "modules_nfcore" + tag "umitools" + tag "umitools/dedup" + + test("se - no stats") { + config "./nextflow.config" + + when { + process { + """ + get_output_stats = false + + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) + ] + input[1] = get_output_stats + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path("${process.out.log[0][1]}").exists() }, + { assert snapshot( + bam(process.out.bam[0][1]).getSamLinesMD5(), + process.out.versions).match() } + ) + } + } + + test("pe - no stats") { + config "./nextflow.config" + + when { + process { + """ + get_output_stats = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) + ] + input[1] = get_output_stats + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path("${process.out.log[0][1]}").exists() }, + { assert snapshot( + bam(process.out.bam[0][1]).getSamLinesMD5(), + process.out.versions).match() } + ) + } + } + + test("pe - with stats") { + config "./nextflow.config" + + when { + process { + """ + get_output_stats = true + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) + ] + input[1] = get_output_stats + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path("${process.out.log[0][1]}").exists() }, + { assert snapshot( + bam(process.out.bam[0][1]).getSamLinesMD5(), + process.out.tsv_edit_distance, + process.out.tsv_per_umi, + process.out.tsv_umi_per_position, + process.out.versions).match() } + ) + } + } + + test("se - no stats - stub") { + + options "-stub" + + config "./nextflow.config" + + when { + process { + """ + get_output_stats = false + + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) + ] + input[1] = get_output_stats + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match() } + ) + } + } + + test("pe - no stats - stub") { + + options "-stub" + + config "./nextflow.config" + + when { + process { + """ + get_output_stats = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) + ] + input[1] = get_output_stats + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match() } + ) + } + } + + test("pe - with stats - stub") { + + options "-stub" + + config "./nextflow.config" + + when { + process { + """ + get_output_stats = true + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) + ] + input[1] = get_output_stats + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match() } + ) + } + } +} diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test.snap b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap new file mode 100644 index 0000000..04b8169 --- /dev/null +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap @@ -0,0 +1,122 @@ +{ + "pe - no stats - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T11:40:46.802233" + }, + "pe - with stats - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T11:40:59.501624" + }, + "pe - with stats": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup_edit_distance.tsv:md5,c247a49b58768e6e2e86a6c08483e612" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup_per_umi.tsv:md5,ced75f7bdbf38bf78f3137d5325a8773" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup_per_umi_per_position.tsv:md5,2e1a12e6f720510880068deddeefe063" + ] + ], + [ + "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:25:28.939957" + }, + "se - no stats - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T11:40:34.598176" + }, + "se - no stats": { + "content": [ + "9158ea6e7a0e54819e25cbac5fbc5cc0", + [ + "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-23T09:06:54.373171" + }, + "pe - no stats": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495", + [ + "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:24:51.423637" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/dedup/tests/nextflow.config b/modules/nf-core/umitools/dedup/tests/nextflow.config new file mode 100644 index 0000000..da6652d --- /dev/null +++ b/modules/nf-core/umitools/dedup/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: UMITOOLS_DEDUP { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umitools/dedup/tests/tags.yml b/modules/nf-core/umitools/dedup/tests/tags.yml new file mode 100644 index 0000000..5934c5c --- /dev/null +++ b/modules/nf-core/umitools/dedup/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/dedup: + - "modules/nf-core/umitools/dedup/**" diff --git a/nextflow.config b/nextflow.config index 3eea644..b241fca 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,65 +1,205 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - UMCUGenetics/DxNextflowRNA Nextflow config file + umcugenetics/dxnextflowrna Nextflow config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- */ -manifest { - name = 'UMCUGenetics/DxNextflowRNA' - author = """UMCU Genetics""" - homePage = 'https://github.com/UMCUGenetics/DxNextflowRNA' - description = """UMCU Genetics RNA Workflow""" - mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.0dev' - doi = '' -} - -// Nextflow plugins -plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} - // Global default params, used in configs params { - genome = '/hpc/diaggen/data/databases/ref_genomes/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta' - star_index = '/hpc/diaggen/data/databases/STAR_ref_genome_index/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set_2.7.9a' - gtf = '/hpc/diaggen/data/databases/gencode/gencode.v44.primary_assembly.basic.annotation.gtf' + // Metadata sequencers + seq_platform = 'Illumina' + seq_center = 'UMCU Genetics' // Input options - input = null + input = null - // Output options - outdir = null - publish_dir_mode = 'link' - email = null + // References + // Homo_sapiens GRCh38 GCA_000001405.15 + gencode_version_name = "GRCh38_gencode_v22_CTAT_lib_Mar012021" + genome_base = "/hpc/diaggen/data/databases/ref_genomes/${params.gencode_version_name}/${params.gencode_version_name}.plug-n-play/ctat_genome_lib_build_dir" + fasta = "${params.genome_base}/ref_genome.fa" + fai = "${params.genome_base}/ref_genome.fa.fai" + gtf = "${params.genome_base}/ref_annot.gtf" + star_index = "${params.genome_base}/ref_genome.fa.star.idx/" - // cluster_options = "--mail-user $params.email --mail-type FAIL --account=diaggen" -} + // Dx_tracks references + dx_tracks_path = '/hpc/diaggen/software/development/Dx_tracks/' + + // Custom reference files GRCh38 + gene_bed = "${params.dx_tracks_path}/rna/${params.gencode_version_name}.ref_annot.gtf.bed" + ref_flat = "${params.dx_tracks_path}/rna/${params.gencode_version_name}.ref_annot.gtf.refflat" + rrna_intervals = "${params.dx_tracks_path}/rna/GRCh38_rRNA_genbank.interval_list" + + // Reference files sortmerna + rrna_database_manifest = "${projectDir}/assets/sortmerna-db-default.txt" + sortmerna_index = "/hpc/diaggen/data/databases/sortmerna/sortmerna_index/genome/sortmerna/idx" + sortmerna_index_versions = "/hpc/diaggen/data/databases/sortmerna/sortmerna_index/genome/sortmerna/versions.yml" + save_non_ribo_reads = false + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] + // Output / Boilerplate options + outdir = null + analysis_id = null + publish_dir_mode = 'link' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' -// Load base.config + cluster_options = "--mail-user ${params.email} --mail-type FAIL --account=diaggen" + + // Tool arguments + rseqc_modules = "bam_stat,infer_experiment,inner_distance,junction_annotation,junction_saturation,read_distribution,read_duplication" + + // Config options + config_profile_name = null + config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Schema validation default options + validate_params = true +} + +// Load base.config by default for all pipelines includeConfig 'conf/base.config' +executor { + queueSize = 1000 + pollInterval = '1min' + queueStatInterval = '5min' + submitRatelimit = '10sec' +} -// Load modules.config for module specific options -includeConfig 'conf/modules.config' +mail { + smtp.host = 'localhost' +} + +profiles { + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + singularity.runOptions = '-B /hpc:/hpc -B $TMPDIR:$TMPDIR' + singularity.cacheDir = '/hpc/diaggen/software/singularity_cache/' + + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + cacheDir = '/hpc/diaggen/software/singularity_cache/' + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } +} + +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + +// Load umcugenetics/dxnextflowrna custom profiles from different institutions. +// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs +// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/dxnextflowrna.config" : "/dev/null" +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +singularity.registry = 'quay.io' + + +// Set bash options +process.shell = """\ +bash + +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +""" + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +timeline { + enabled = true + file = "${params.outdir}/log/nextflow_timeline.html" + overwrite = true + +} report { enabled = true - file = "$params.outdir/log/nextflow_report.html" + file = "${params.outdir}/log/nextflow_report.html" overwrite = true } - trace { - enabled = true - file = "$params.outdir/log/nextflow_trace.txt" + enabled = true + file = "${params.outdir}/log/nextflow_trace.txt" + fields = 'task_id,hash,native_id,process,tag,name,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt' overwrite = true } -timeline { - enabled = true - file = "$params.outdir/log/nextflow_timeline.html" - overwrite = true +manifest { + name = 'umcugenetics/dxnextflowrna' + author = """UMCU Genetics""" + homePage = 'https://github.com/umcugenetics/dxnextflowrna' + description = """UMCU Genetics RNA seq Workflow""" + mainScript = 'main.nf' + nextflowVersion = '!>=24.10.2' + version = '1.0.0' + doi = '' } + +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +validation { + help { + enabled = true + command = "nextflow run $manifest.name -profile --input --outdir --analysis_id " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/${manifest.name}/blob/master/CITATIONS.md +""" + } + summary { + afterText = validation.help.afterText + } +} + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index cae3134..0ce4ced 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,79 +1,315 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/UMCUGenetics/DxNextflowRNA/master/nextflow_schema.json", - "title": "UMCUGenetics/DxNextflowRNA pipeline parameters", - "description": "UMCU Genetics RNA Workflow", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "directory-path", - "exists": true, - "description": "Path to input data.", - "fa_icon": "fas fa-folder-open" - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved.", - "fa_icon": "fas fa-folder-open" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - } - } - }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Path of genome reference.", - "fa_icon": "fas fa-book", - "help_text": "" + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/umcugenetics/dxnextflowrna/master/nextflow_schema.json", + "title": "umcugenetics/dxnextflowrna pipeline parameters", + "description": "UMCU Genetics RNA seq Workflow", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": [ + "input", + "outdir", + "analysis_id" + ], + "properties": { + "analysis_id": { + "type": "string" + }, + "input": { + "type": "string", + "format": "directory-path", + "exists": true, + "description": "Path to input data.", + "fa_icon": "fas fa-folder-open" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + } + } + }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", + "properties": { + "fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "fa_icon": "far fa-file-code" + }, + "fai": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?ai(\\.gz)?$", + "description": "Path to FASTA genome index file.", + "fa_icon": "far fa-file-code" + }, + "gencode_version_name": { + "type": "string", + "default": "GRCh38_gencode_v22_CTAT_lib_Mar012021" + }, + "genome_base": { + "type": "string", + "fa_icon": "far fa-file-code", + "format": "directory-path" + }, + "gene_bed": { + "type": "string", + "fa_icon": "far fa-file-code", + "format": "file-path", + "mimetype": "text/plain" + }, + "gtf": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf(\\.gz)?$", + "description": "Path to GTF annotation file.", + "fa_icon": "fas fa-code-branch" + }, + "ref_flat": { + "type": "string", + "fa_icon": "far fa-file-code", + "format": "file-path", + "mimetype": "text/plain" + }, + "rrna_database_manifest": { + "type": "string" + }, + "rrna_intervals": { + "type": "string", + "fa_icon": "far fa-file-code", + "format": "file-path", + "mimetype": "text/plain" + }, + "sortmerna_index": { + "type": "string" + }, + "sortmerna_index_versions": { + "type": "string" + }, + "star_index": { + "type": "string", + "format": "directory-path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built STAR index." + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "cluster_options": { + "type": "string", + "default": "--mail-user null --mail-type FAIL --account=diaggen" + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "link", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "multiqc_config": { + "type": "string", + "format": "file-path", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true, + "default": "/home/cog/edejong2/repos/DxNextflowRNA_create_pipeline/assets/multiqc_config.yml" + }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true + }, + "rseqc_modules": { + "type": "string", + "default": "bam_stat,infer_experiment,inner_distance,junction_annotation,junction_saturation,read_distribution,read_duplication" + }, + "save_non_ribo_reads": { + "type": "boolean" + }, + "seq_platform": { + "type": "string", + "default": "Illumina", + "description": "Sequencing platform" + }, + "seq_center": { + "type": "string", + "default": "UMCU Genetics", + "description": "Sequencing center" + } + } } - } }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "properties": { - "publish_dir_mode": { + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/reference_genome_options" + }, + { + "$ref": "#/$defs/institutional_config_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ], + "properties": { + "dx_tracks_path": { "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true + "default": "/hpc/diaggen/software/development/Dx_tracks/" } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/reference_genome_options" - }, - { - "$ref": "#/definitions/generic_options" } - ] } diff --git a/nf-params.yml b/nf-params.yml new file mode 100644 index 0000000..aa8a403 --- /dev/null +++ b/nf-params.yml @@ -0,0 +1,9 @@ +## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## umcugenetics/dxnextflowrna 1.0.0 +## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## This is an example parameter file to pass to the `-params-file` option +## of nextflow run with the umcugenetics/dxnextflowrna pipeline. +## +## Uncomment lines with a single '#' if you want to pass the parameter to +## the pipeline. +## ----------------------------------------------------------------------------- diff --git a/run_nextflow_rna.sh b/run_nextflow_rna.sh new file mode 100755 index 0000000..61edaca --- /dev/null +++ b/run_nextflow_rna.sh @@ -0,0 +1,112 @@ +#!/bin/bash +set -euo pipefail + +workflow_path='/hpc/diaggen/software/production/DxNextflowRNA/' + +# Set input and output dirs +input=`realpath $1` +output=`realpath $2` +email=$3 +optional_params=( "${@:4}" ) + +analysis_id=$(basename ${output}) + +mkdir -p $output && cd $output +mkdir -p log + +if ! { [ -f 'workflow.running' ] || [ -f 'workflow.done' ] || [ -f 'workflow.failed' ]; }; then +touch workflow.running + +output_log="${output}/log" +file="${output_log}/nextflow_trace.txt" +# Check if nextflow_trace.txt exists +if [ -e "${file}" ]; then + current_suffix=0 + # Get a list of all trace files WITH a suffix + trace_file_list=$(ls "${output_log}"/nextflow_trace*.txt 2> /dev/null) + # Check if any trace files with a suffix exist + if [ "$?" -eq 0 ]; then + # Check for each trace file with a suffix if the suffix is the highest and save that one as the current suffix + for trace_file in ${trace_file_list}; do + basename_trace_file=$(basename "${trace_file}") + if echo "${basename_trace_file}" | grep -qE '[0-9]+'; then + suffix=$(echo "${basename_trace_file}" | grep -oE '[0-9]+') + else + suffix=0 + fi + + if [ "${suffix}" -gt "${current_suffix}" ]; then + current_suffix=${suffix} + fi + done + fi + # Increment the suffix + new_suffix=$((current_suffix + 1)) + # Create the new file name with the incremented suffix + new_file="${file%.*}_$new_suffix.${file##*.}" + # Rename the file + mv "${file}" "${new_file}" +fi + +sbatch < md5sum.txt + + echo "RNA workflow completed successfully." + rm workflow.running + touch workflow.done + + echo "Change permissions" + chmod 775 -R $output + + exit 0 +else + echo "Nextflow failed" + rm workflow.running + touch workflow.failed + + echo "Change permissions" + chmod 775 -R $output + + exit 1 +fi +EOT +else +echo "Workflow job not submitted, please check $output for 'workflow.status' files." +fi diff --git a/subworkflows/local/fastq_bam_qc.nf b/subworkflows/local/fastq_bam_qc.nf new file mode 100644 index 0000000..edc8787 --- /dev/null +++ b/subworkflows/local/fastq_bam_qc.nf @@ -0,0 +1,80 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// MODULES, alphabetical order +include { FASTQC } from '../../modules/nf-core/fastqc/main' +include { PICARD_COLLECTRNASEQMETRICS } from '../../modules/nf-core/picard/collectrnaseqmetrics/main' +include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap/main' + +// SUBWORKFLOWS, alphabetical order +include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FASTQ_BAM_QC (sub)workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +workflow FASTQ_BAM_QC { + take: + ch_bam_bai // channel: [ val(meta), [ bam, bai] ] + ch_fasta // channel: path(fasta) + ch_fastq // channel: [ val(meta), [ fastq_forward, fastq_reverse ] ] + ch_gene_bed // channel: path(gene_bed.bed) + ch_ref_flat // channel: path(refFlat.txt) + ch_rrna_interval // channel: path(rrna_intervals.bed) + + main: + ch_bam = ch_bam_bai.map { meta, bam, bai -> [meta, bam] } + ch_versions = Channel.empty() + + // QC tools on fastq + FASTQC(ch_fastq) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + // QC tools on bam + def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect { it.trim().toLowerCase() } : [] + BAM_RSEQC(ch_bam_bai, ch_gene_bed, rseqc_modules) + ch_versions = ch_versions.mix(BAM_RSEQC.out.versions) + + PICARD_COLLECTRNASEQMETRICS(ch_bam, ch_ref_flat, ch_fasta, ch_rrna_interval) + ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions.first()) + + PRESEQ_LCEXTRAP(ch_bam) + ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + + emit: + fastqc_html = FASTQC.out.html // channel: [ val(meta), html] + fastqc_zip = FASTQC.out.zip // channel: [ val(meta), zip ] + bamstat_txt = BAM_RSEQC.out.bamstat_txt // channel: [ val(meta), txt ] + innerdistance_all = BAM_RSEQC.out.innerdistance_all // channel: [ val(meta), {txt, pdf, r} ] + innerdistance_distance = BAM_RSEQC.out.innerdistance_distance // channel: [ val(meta), txt ] + innerdistance_freq = BAM_RSEQC.out.innerdistance_freq // channel: [ val(meta), txt ] + innerdistance_mean = BAM_RSEQC.out.innerdistance_mean // channel: [ val(meta), txt ] + innerdistance_pdf = BAM_RSEQC.out.innerdistance_pdf // channel: [ val(meta), pdf ] + innerdistance_rscript = BAM_RSEQC.out.innerdistance_rscript // channel: [ val(meta), r ] + inferexperiment_txt = BAM_RSEQC.out.inferexperiment_txt // channel: [ val(meta), txt ] + junctionannotation_all = BAM_RSEQC.out.junctionannotation_all // channel: [ val(meta), {bed, xls, pdf, r, log} ] + junctionannotation_bed = BAM_RSEQC.out.junctionannotation_bed // channel: [ val(meta), bed ] + junctionannotation_interact_bed = BAM_RSEQC.out.junctionannotation_interact_bed // channel: [ val(meta), bed ] + junctionannotation_xls = BAM_RSEQC.out.junctionannotation_xls // channel: [ val(meta), xls ] + junctionannotation_pdf = BAM_RSEQC.out.junctionannotation_pdf // channel: [ val(meta), pdf ] + junctionannotation_events_pdf = BAM_RSEQC.out.junctionannotation_events_pdf // channel: [ val(meta), pdf ] + junctionannotation_rscript = BAM_RSEQC.out.junctionannotation_rscript // channel: [ val(meta), r ] + junctionannotation_log = BAM_RSEQC.out.junctionannotation_log // channel: [ val(meta), log ] + junctionsaturation_all = BAM_RSEQC.out.junctionsaturation_all // channel: [ val(meta), {pdf, r} ] + junctionsaturation_pdf = BAM_RSEQC.out.junctionsaturation_pdf // channel: [ val(meta), pdf ] + junctionsaturation_rscript = BAM_RSEQC.out.junctionsaturation_rscript // channel: [ val(meta), r ] + readdistribution_txt = BAM_RSEQC.out.readdistribution_txt // channel: [ val(meta), txt ] + readduplication_all = BAM_RSEQC.out.readduplication_all // channel: [ val(meta), {xls, pdf, r} ] + readduplication_seq_xls = BAM_RSEQC.out.readduplication_seq_xls // channel: [ val(meta), xls ] + readduplication_pos_xls = BAM_RSEQC.out.readduplication_pos_xls // channel: [ val(meta), xls ] + readduplication_pdf = BAM_RSEQC.out.readduplication_pdf // channel: [ val(meta), pdf ] + readduplication_rscript = BAM_RSEQC.out.readduplication_rscript // channel: [ val(meta), r ] + tin_txt = BAM_RSEQC.out.tin_txt // channel: [ val(meta), txt ] + rna_metrics = PICARD_COLLECTRNASEQMETRICS.out.metrics // channel: [ val(meta), rna_metrics ] + lc_extrap = PRESEQ_LCEXTRAP.out.lc_extrap // channel: [ val(meta), lc_extrap.txt + preseq_lcextrap_log = PRESEQ_LCEXTRAP.out.log // channel: [ val(meta), log ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastq_trim_filter_align_dedup.nf b/subworkflows/local/fastq_trim_filter_align_dedup.nf new file mode 100644 index 0000000..d7c580a --- /dev/null +++ b/subworkflows/local/fastq_trim_filter_align_dedup.nf @@ -0,0 +1,108 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// MODULES, alphabetical order +include { SAMTOOLS_CONVERT } from '../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' +// Different ext.args are provided bedepending on using sortmerna to create an index only (SORTMERNA_INDEX) +// or run sortmerna to filter the reads (SORTMERNA_READS). +include { SORTMERNA as SORTMERNA_READS } from '../../modules/nf-core/sortmerna/main' +include { STAR_ALIGN } from '../../modules/nf-core/star/align/main.nf' +include { TRIMGALORE } from '../../modules/nf-core/trimgalore/main' + +// SUBWORKFLOWS, alphabetical order +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FASTQ_TRIM_FILTER_ALIGN_DEDUP (sub)workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +workflow FASTQ_TRIM_FILTER_ALIGN_DEDUP { + take: + ch_fasta_fai // channel: [ val(meta), path(fa), path(fai) ] + ch_fastq // channel: [ val(meta), [ path(fastq1), path(fastq2) ] ] + ch_gtf // channel: [ val(meta), path(gtf) ] + ch_sortmerna_fastas // channel: [ val(meta), file(fastas)] + ch_sortmerna_index // channel: [ val(meta), path(/path/to/sortmerna/index/) + ch_star_index // channel: [ val(meta), path(star_index) ] + seq_platform // val(seq_platform) + seq_center // val(seq_center) + star_ignore_sjdbgtf // boolean + + main: + // Create empty versions channel, and fill with each tools version + ch_versions = Channel.empty() + + TRIMGALORE(ch_fastq) + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + + SORTMERNA_READS(TRIMGALORE.out.reads, ch_sortmerna_fastas, ch_sortmerna_index) + ch_versions = ch_versions.mix(SORTMERNA_READS.out.versions.first()) + + STAR_ALIGN(SORTMERNA_READS.out.reads, ch_star_index, ch_gtf, star_ignore_sjdbgtf, seq_platform, seq_center) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + SAMTOOLS_MERGE( + STAR_ALIGN.out.bam_sorted_aligned.map { meta, bam -> + new_id = meta.id.split('_')[0] + [meta + [id: new_id], bam] + }.groupTuple(), + [[id: 'null'], []], + [[id: 'null'], []], + ) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) + + // samtools index will create a .bai. RSeQC, and maybe other tools as well, requires .bai instead of .csi. + SAMTOOLS_INDEX(SAMTOOLS_MERGE.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS( + SAMTOOLS_MERGE.out.bam.join(SAMTOOLS_INDEX.out.bai), + true, + ) + ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.versions) + + // Create variable to use in emit as well. + ch_bam_bai = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bam.join(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bai) + + SAMTOOLS_CONVERT( + ch_bam_bai, + ch_fasta_fai.map { meta, fasta, fai -> [meta, fasta] }, + ch_fasta_fai.map { meta, fasta, fai -> [meta, fai] }, + ) + ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) + + emit: + trim_reads = TRIMGALORE.out.reads // channel: [ val(meta), path(fq.gz) ] + trim_unpaired = TRIMGALORE.out.unpaired // channel: [ val(meta), path(fq.gz) ] + trim_html = TRIMGALORE.out.html // channel: [ val(meta), path(html) ] + trim_zip = TRIMGALORE.out.zip // channel: [ val(meta), path(zip) ] + trim_log = TRIMGALORE.out.log // channel: [ val(meta), path(txt) ] + sortmerna_log = SORTMERNA_READS.out.log // channel: [ val(meta), path(log) ] + star_align_bam = STAR_ALIGN.out.bam // channel: [ val(meta), path(bam) ] + star_align_bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), path(bam) ] + star_align_bam_unsorted = STAR_ALIGN.out.bam_unsorted // channel: [ val(meta), path(bam) ] + star_align_bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), path(bam) ] + star_align_sam = STAR_ALIGN.out.sam // channel: [ val(meta), path(sam) ] + star_align_fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), path(fastq) ] + star_align_tab = STAR_ALIGN.out.tab // channel: [ val(meta), path(tab) ] + star_align_spl_junc_tab = STAR_ALIGN.out.spl_junc_tab // channel: [ val(meta), path(spl_junc_tab) ] + star_align_read_per_gene_tab = STAR_ALIGN.out.read_per_gene_tab // channel: [ val(meta), path(read_per_gene_tab) ] + star_align_junction = STAR_ALIGN.out.junction // channel: [ val(meta), path(junction) ] + star_align_log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), path(log_final) ] + star_align_log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), path(log_out) ] + star_align_log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), path(log_progress) ] + star_align_wig = STAR_ALIGN.out.wig // channel: [ val(meta), path(wig) ] + star_align_bedgraph = STAR_ALIGN.out.bedgraph // channel: [ val(meta), path(bg) ] + umitools_dedup_log = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.deduplog // channel: [ val(meta), path(log) ] + samtools_stats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + ch_bam_bai = ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_cram_crai = SAMTOOLS_CONVERT.out.cram.join(SAMTOOLS_CONVERT.out.crai) // channel: [ val(meta), path(cram), path(crai) ] + versions = ch_versions // channel: [ versions.yml, versions.yml, ... ] +} diff --git a/subworkflows/local/utils_umcugenetics_dxnextflowrna_pipeline/main.nf b/subworkflows/local/utils_umcugenetics_dxnextflowrna_pipeline/main.nf new file mode 100644 index 0000000..d0476b9 --- /dev/null +++ b/subworkflows/local/utils_umcugenetics_dxnextflowrna_pipeline/main.nf @@ -0,0 +1,247 @@ +// +// Subworkflow with functionality specific to the umcugenetics/dxnextflowrna pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, + ) + + // + // Validate parameters and generate parameter summary to stdout + // + UTILS_NFSCHEMA_PLUGIN( + workflow, + validate_params, + null, + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE( + nextflow_cli_args + ) + + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + emit: + versions = ch_versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_report.toList(), + ) + } + + completionSummary(monochrome_logs) + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + validateParameters() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect { meta -> meta.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [metas[0], fastqs] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} +// +// Generate methods description for MultiQC +// +def toolCitationText() { + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "picard-tools", + "preseq (Daley et al. 2013)", + "RSeQC (Wang et al. 2012)", + "SAMtools (Li et al. 2009)", + "SortMeRNA (Kopylova et al. 2012)", + "STAR (Dobin et al. 2013)", + "TrimGalore", + "UMI-tools (Smith et al. 2017)", + ".", + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ", + "
  • picard-tools, URL: http://broadinstitute.github.io/picard
  • ", + "
  • Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374.
  • ", + "
  • Wang L, Wang S, Li W. RSeQC: quality control of RNA-seq experiments Bioinformatics. 2012 Aug 15;28(16):2184-5. doi: 10.1093/bioinformatics/bts356. Epub 2012 Jun 27. PubMed PMID: 22743226.
  • ", + "
  • Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002.
  • ", + "
  • Kopylova E, Noé L, Touzet H. SortMeRNA: fast and accurate filtering of ribosomal RNAs in metatranscriptomic data Bioinformatics. 2012 Dec 15;28(24):3211-7. doi: 10.1093/bioinformatics/bts611. Epub 2012 Oct 15. PubMed PMID: 23071270.
  • ", + "
  • Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905.
  • ", + "
  • Trim Galore, URL: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
  • ", + "
  • Smith T, Heger A, Sudbery I. UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy Genome Res. 2017 Mar;27(3):491-499. doi: 10.1101/gr.209601.116. Epub 2017 Jan 18. PubMed PMID: 28100584; PubMed Central PMCID: PMC5340976.
  • ", + "
  • Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241.
  • ", + "
  • Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675.
  • ", + "
  • Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311.
  • ", + "
  • Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031.
  • ", + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } + else { + meta["doi_text"] = "" + } + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf index 7c07084..fe6ff31 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf @@ -44,6 +44,7 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { emit: bam = UMITOOLS_DEDUP.out.bam // channel: [ val(meta), path(bam) ] + deduplog = UMITOOLS_DEDUP.out.log // channel: [ val(meta), path(log) ] bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml index e655484..93e1238 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml @@ -37,6 +37,10 @@ output: description: | CSI samtools index Structure: [ val(meta), path(csi) ] + - deduplog: + description: | + UMI-tools deduplication log + Structure: [ val(meta), path(log) ] - stats: description: | File containing samtools stats output diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test new file mode 100644 index 0000000..93e6248 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test @@ -0,0 +1,80 @@ +nextflow_workflow { + + name "Test Workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS" + script "../main.nf" + workflow "BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_dedup_stats_samtools_umitools" + tag "subworkflows/bam_stats_samtools" + tag "bam_dedup_stats_samtools_umitools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + tag "umitools" + tag "umitools/dedup" + + test("sarscov2_bam_bai") { + + when { + workflow { + """ + val_get_dedup_stats = false + + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = val_get_dedup_stats + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.deduplog.get(0).get(1) ==~ ".*.log"}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot( + bam(workflow.out.bam[0][1]).getSamLinesMD5(), + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match() } + ) + } + } + + test("sarscov2_bam_bai - stub") { + + options "-stub" + + when { + workflow { + """ + val_get_dedup_stats = false + + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = val_get_dedup_stats + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap new file mode 100644 index 0000000..3b36135 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap @@ -0,0 +1,170 @@ +{ + "sarscov2_bam_bai": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495", + [ + [ + { + "id": "test" + }, + "test.stats:md5,41ba57a9b90b54587e7d154e5405ea5e" + ] + ], + [ + [ + { + "id": "test" + }, + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" + ] + ], + [ + [ + { + "id": "test" + }, + "test.idxstats:md5,85d20a901eef23ca50c323638a2eb602" + ] + ], + [ + "versions.yml:md5,2bf4310f1d49429016f5633f965e9ed0", + "versions.yml:md5,306b85f2109aeac87f1bf0eed94e6940", + "versions.yml:md5,db76f947845712952426efae5356018f", + "versions.yml:md5,eb35d8fd0745ef0ebacb9e26d4124e1d", + "versions.yml:md5,f4935d4e563646266cc6bb4d75d0fb7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:23:13.841219" + }, + "sarscov2_bam_bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,2bf4310f1d49429016f5633f965e9ed0", + "versions.yml:md5,306b85f2109aeac87f1bf0eed94e6940", + "versions.yml:md5,db76f947845712952426efae5356018f", + "versions.yml:md5,eb35d8fd0745ef0ebacb9e26d4124e1d", + "versions.yml:md5,f4935d4e563646266cc6bb4d75d0fb7d" + ], + "bai": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "deduplog": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "flagstat": [ + [ + { + "id": "test" + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "idxstats": [ + [ + { + "id": "test" + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2bf4310f1d49429016f5633f965e9ed0", + "versions.yml:md5,306b85f2109aeac87f1bf0eed94e6940", + "versions.yml:md5,db76f947845712952426efae5356018f", + "versions.yml:md5,eb35d8fd0745ef0ebacb9e26d4124e1d", + "versions.yml:md5,f4935d4e563646266cc6bb4d75d0fb7d" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:04:43.790052381" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/tags.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/tags.yml new file mode 100644 index 0000000..bfd5e02 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_dedup_stats_samtools_umitools: + - subworkflows/nf-core/bam_dedup_stats_samtools_umitools/** diff --git a/subworkflows/nf-core/bam_rseqc/main.nf b/subworkflows/nf-core/bam_rseqc/main.nf new file mode 100644 index 0000000..043321a --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/main.nf @@ -0,0 +1,185 @@ +// +// Run RSeQC modules +// + +include { RSEQC_BAMSTAT } from '../../../modules/nf-core/rseqc/bamstat/main' +include { RSEQC_INNERDISTANCE } from '../../../modules/nf-core/rseqc/innerdistance/main' +include { RSEQC_INFEREXPERIMENT } from '../../../modules/nf-core/rseqc/inferexperiment/main' +include { RSEQC_JUNCTIONANNOTATION } from '../../../modules/nf-core/rseqc/junctionannotation/main' +include { RSEQC_JUNCTIONSATURATION } from '../../../modules/nf-core/rseqc/junctionsaturation/main' +include { RSEQC_READDISTRIBUTION } from '../../../modules/nf-core/rseqc/readdistribution/main' +include { RSEQC_READDUPLICATION } from '../../../modules/nf-core/rseqc/readduplication/main' +include { RSEQC_TIN } from '../../../modules/nf-core/rseqc/tin/main' + +workflow BAM_RSEQC { + take: + bam_bai // channel: [ val(meta), [ bam, bai ] ] + bed // channel: [ genome.bed ] + rseqc_modules // list: rseqc modules to run + + main: + + bam = bam_bai.map{ [ it[0], it[1] ] } + + versions = Channel.empty() + + // + // Run RSeQC bam_stat.py + // + bamstat_txt = Channel.empty() + + if ('bam_stat' in rseqc_modules) { + RSEQC_BAMSTAT(bam) + bamstat_txt = RSEQC_BAMSTAT.out.txt + versions = versions.mix(RSEQC_BAMSTAT.out.versions.first()) + } + + // + // Run RSeQC inner_distance.py + // + innerdistance_all = Channel.empty() + innerdistance_distance = Channel.empty() + innerdistance_freq = Channel.empty() + innerdistance_mean = Channel.empty() + innerdistance_pdf = Channel.empty() + innerdistance_rscript = Channel.empty() + + if ('inner_distance' in rseqc_modules) { + RSEQC_INNERDISTANCE(bam, bed) + innerdistance_distance = RSEQC_INNERDISTANCE.out.distance + innerdistance_freq = RSEQC_INNERDISTANCE.out.freq + innerdistance_mean = RSEQC_INNERDISTANCE.out.mean + innerdistance_pdf = RSEQC_INNERDISTANCE.out.pdf + innerdistance_rscript = RSEQC_INNERDISTANCE.out.rscript + innerdistance_all = innerdistance_distance.mix(innerdistance_freq, innerdistance_mean, innerdistance_pdf, innerdistance_rscript) + versions = versions.mix(RSEQC_INNERDISTANCE.out.versions.first()) + } + + // + // Run RSeQC infer_experiment.py + // + inferexperiment_txt = Channel.empty() + if ('infer_experiment' in rseqc_modules) { + RSEQC_INFEREXPERIMENT(bam, bed) + inferexperiment_txt = RSEQC_INFEREXPERIMENT.out.txt + versions = versions.mix(RSEQC_INFEREXPERIMENT.out.versions.first()) + } + + // + // Run RSeQC junction_annotation.py + // + junctionannotation_all = Channel.empty() + junctionannotation_bed = Channel.empty() + junctionannotation_interact_bed = Channel.empty() + junctionannotation_xls = Channel.empty() + junctionannotation_pdf = Channel.empty() + junctionannotation_events_pdf = Channel.empty() + junctionannotation_rscript = Channel.empty() + junctionannotation_log = Channel.empty() + + if ('junction_annotation' in rseqc_modules) { + RSEQC_JUNCTIONANNOTATION(bam, bed) + junctionannotation_bed = RSEQC_JUNCTIONANNOTATION.out.bed + junctionannotation_interact_bed = RSEQC_JUNCTIONANNOTATION.out.interact_bed + junctionannotation_xls = RSEQC_JUNCTIONANNOTATION.out.xls + junctionannotation_pdf = RSEQC_JUNCTIONANNOTATION.out.pdf + junctionannotation_events_pdf = RSEQC_JUNCTIONANNOTATION.out.events_pdf + junctionannotation_rscript = RSEQC_JUNCTIONANNOTATION.out.rscript + junctionannotation_log = RSEQC_JUNCTIONANNOTATION.out.log + junctionannotation_all = junctionannotation_bed.mix(junctionannotation_interact_bed, junctionannotation_xls, junctionannotation_pdf, junctionannotation_events_pdf, junctionannotation_rscript, junctionannotation_log) + versions = versions.mix(RSEQC_JUNCTIONANNOTATION.out.versions.first()) + } + + // + // Run RSeQC junction_saturation.py + // + junctionsaturation_all = Channel.empty() + junctionsaturation_pdf = Channel.empty() + junctionsaturation_rscript = Channel.empty() + + if ('junction_saturation' in rseqc_modules) { + RSEQC_JUNCTIONSATURATION(bam, bed) + junctionsaturation_pdf = RSEQC_JUNCTIONSATURATION.out.pdf + junctionsaturation_rscript = RSEQC_JUNCTIONSATURATION.out.rscript + junctionsaturation_all = junctionsaturation_pdf.mix(junctionsaturation_rscript) + versions = versions.mix(RSEQC_JUNCTIONSATURATION.out.versions.first()) + } + + // + // Run RSeQC read_distribution.py + // + readdistribution_txt = Channel.empty() + + if ('read_distribution' in rseqc_modules) { + RSEQC_READDISTRIBUTION(bam, bed) + readdistribution_txt = RSEQC_READDISTRIBUTION.out.txt + versions = versions.mix(RSEQC_READDISTRIBUTION.out.versions.first()) + } + + // + // Run RSeQC read_duplication.py + // + readduplication_all = Channel.empty() + readduplication_seq_xls = Channel.empty() + readduplication_pos_xls = Channel.empty() + readduplication_pdf = Channel.empty() + readduplication_rscript = Channel.empty() + + if ('read_duplication' in rseqc_modules) { + RSEQC_READDUPLICATION(bam ) + readduplication_seq_xls = RSEQC_READDUPLICATION.out.seq_xls + readduplication_pos_xls = RSEQC_READDUPLICATION.out.pos_xls + readduplication_pdf = RSEQC_READDUPLICATION.out.pdf + readduplication_rscript = RSEQC_READDUPLICATION.out.rscript + readduplication_all = readduplication_seq_xls.mix(readduplication_pos_xls, readduplication_pdf, readduplication_rscript) + versions = versions.mix(RSEQC_READDUPLICATION.out.versions.first()) + } + + // + // Run RSeQC tin.py + // + tin_txt = Channel.empty() + + if ('tin' in rseqc_modules) { + RSEQC_TIN(bam_bai, bed) + tin_txt = RSEQC_TIN.out.txt + versions = versions.mix(RSEQC_TIN.out.versions.first()) + } + + emit: + bamstat_txt // channel: [ val(meta), txt ] + + innerdistance_all // channel: [ val(meta), {txt, pdf, r} ] + innerdistance_distance // channel: [ val(meta), txt ] + innerdistance_freq // channel: [ val(meta), txt ] + innerdistance_mean // channel: [ val(meta), txt ] + innerdistance_pdf // channel: [ val(meta), pdf ] + innerdistance_rscript // channel: [ val(meta), r ] + + inferexperiment_txt // channel: [ val(meta), txt ] + + junctionannotation_all // channel: [ val(meta), {bed, xls, pdf, r, log} ] + junctionannotation_bed // channel: [ val(meta), bed ] + junctionannotation_interact_bed // channel: [ val(meta), bed ] + junctionannotation_xls // channel: [ val(meta), xls ] + junctionannotation_pdf // channel: [ val(meta), pdf ] + junctionannotation_events_pdf // channel: [ val(meta), pdf ] + junctionannotation_rscript // channel: [ val(meta), r ] + junctionannotation_log // channel: [ val(meta), log ] + + junctionsaturation_all // channel: [ val(meta), {pdf, r} ] + junctionsaturation_pdf // channel: [ val(meta), pdf ] + junctionsaturation_rscript // channel: [ val(meta), r ] + + readdistribution_txt // channel: [ val(meta), txt ] + + readduplication_all // channel: [ val(meta), {xls, pdf, r} ] + readduplication_seq_xls // channel: [ val(meta), xls ] + readduplication_pos_xls // channel: [ val(meta), xls ] + readduplication_pdf // channel: [ val(meta), pdf ] + readduplication_rscript // channel: [ val(meta), r ] + + tin_txt // channel: [ val(meta), txt ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_rseqc/meta.yml b/subworkflows/nf-core/bam_rseqc/meta.yml new file mode 100644 index 0000000..6e76ff5 --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/meta.yml @@ -0,0 +1,162 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_rseqc +description: Subworkflow to run multiple commands in the RSeqC package +keywords: + - rnaseq + - experiment + - inferexperiment + - bamstat + - innerdistance + - junctionannotation + - junctionsaturation + - readdistribution + - readduplication + - tin +components: + - rseqc + - rseqc/tin + - rseqc/readduplication + - rseqc/readdistribution + - rseqc/junctionsaturation + - rseqc/junctionannotation + - rseqc/innerdistance + - rseqc/inferexperiment + - rseqc/bamstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file to calculate statistics + pattern: "*.{bam}" + - bai: + type: file + description: Index for input BAM file + pattern: "*.{bai}" + - bed: + type: file + description: BED file for the reference gene model + pattern: "*.{bed}" + - rseqc_modules: + type: list + description: | + List of rseqc modules to run + e.g. [ 'bam_stat', 'infer_experiment' ] +output: + - bamstat_txt: + type: file + description: bam statistics report + pattern: "*.bam_stat.txt" + - innerdistance_all: + type: file + description: All the output files from RSEQC_INNERDISTANCE + pattern: "*.{txt,pdf,R}" + - innerdistance_distance: + type: file + description: the inner distances + pattern: "*.inner_distance.txt" + - innerdistance_freq: + type: file + description: frequencies of different insert sizes + pattern: "*.inner_distance_freq.txt" + - innerdistance_mean: + type: file + description: mean/median values of inner distances + pattern: "*.inner_distance_mean.txt" + - innerdistance_pdf: + type: file + description: distribution plot of inner distances + pattern: "*.inner_distance_plot.pdf" + - innerdistance_rscript: + type: file + description: script to reproduce the plot + pattern: "*.inner_distance_plot.R" + - inferexperiment_txt: + type: file + description: infer_experiment results report + pattern: "*.infer_experiment.txt" + - junctionannotation_all: + type: file + description: All the output files from RSEQC_JUNCTIONANNOTATION + pattern: "*.{bed,xls,pdf,R,log}" + - junctionannotation_bed: + type: file + description: bed file of annotated junctions + pattern: "*.junction.bed" + - junctionannotation_interact_bed: + type: file + description: Interact bed file + pattern: "*.Interact.bed" + - junctionannotation_xls: + type: file + description: xls file with junction information + pattern: "*.xls" + - junctionannotation_pdf: + type: file + description: junction plot + pattern: "*.junction.pdf" + - junctionannotation_events_pdf: + type: file + description: events plot + pattern: "*.events.pdf" + - junctionannotation_rscript: + type: file + description: Rscript to reproduce the plots + pattern: "*.r" + - junctionannotation_log: + type: file + description: Log file generated by tool + pattern: "*.log" + - junctionsaturation_all: + type: file + description: All the output files from RSEQC_JUNCTIONSATURATION + pattern: "*.{pdf,R}" + - junctionsaturation_pdf: + type: file + description: Junction saturation report + pattern: "*.pdf" + - junctionsaturation_rscript: + type: file + description: Junction saturation R-script + pattern: "*.r" + - readdistribution_txt: + type: file + description: the read distribution report + pattern: "*.read_distribution.txt" + - readduplication_all: + type: file + description: All the output files from RSEQC_READDUPLICATION + pattern: "*.{xls,pdf,R}" + - readduplication_seq_xls: + type: file + description: Read duplication rate determined from mapping position of read + pattern: "*seq.DupRate.xls" + - readduplication_pos_xls: + type: file + description: Read duplication rate determined from sequence of read + pattern: "*pos.DupRate.xls" + - readduplication_pdf: + type: file + description: plot of duplication rate + pattern: "*.pdf" + - readduplication_rscript: + type: file + description: script to reproduce the plot + pattern: "*.R" + - tin_txt: + type: file + description: TXT file containing tin.py results summary + pattern: "*.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/subworkflows/nf-core/bam_rseqc/tests/main.nf.test b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test new file mode 100644 index 0000000..d228c6b --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test @@ -0,0 +1,147 @@ +nextflow_workflow { + + name "Test Workflow BAM_RSEQC" + script "../main.nf" + workflow "BAM_RSEQC" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_rseqc" + tag "bam_rseqc" + tag "rseqc" + tag "rseqc/bamstat" + tag "rseqc/inferexperiment" + tag "rseqc/innerdistance" + tag "rseqc/junctionannotation" + tag "rseqc/junctionsaturation" + tag "rseqc/readdistribution" + tag "rseqc/readduplication" + tag "rseqc/tin" + + test("sarscov2 paired-end [bam]") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin'] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.innerdistance_all.any { it[1].endsWith('.pdf') && file(it[1]).exists() } }, + { assert workflow.out.readduplication_all.any { it[1].endsWith('.pdf') && file(it[1]).exists() } }, + { assert workflow.out.junctionsaturation_all.any { it[1].endsWith('.pdf') && file(it[1]).exists() } }, + { assert snapshot( + workflow.out.bamstat_txt, + workflow.out.innerdistance_all.findAll { it[1].endsWith('.pdf') == false }, + workflow.out.inferexperiment_txt, + workflow.out.junctionannotation_all.findAll { it[1].endsWith('.xls') == false && it[1].endsWith('.r') == false }, + workflow.out.junctionsaturation_all.findAll { it[1].endsWith('.pdf') == false }, + workflow.out.readdistribution_txt, + workflow.out.readduplication_all.findAll { it[1].endsWith('.pdf') == false }, + workflow.out.tin_txt, + workflow.out.versions).match()} + ) + } + } + + test("sarscov2 paired-end [bam] no modules") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bamstat_txt.size() == 0 }, + { assert workflow.out.innerdistance_all.size() == 0 }, + { assert workflow.out.inferexperiment_txt.size() == 0 }, + { assert workflow.out.junctionannotation_all.size() == 0 }, + { assert workflow.out.junctionsaturation_all.size() == 0 }, + { assert workflow.out.readdistribution_txt.size() == 0 }, + { assert workflow.out.readduplication_all.size() == 0 }, + { assert workflow.out.tin_txt.size() == 0 }, + { assert workflow.out.versions.size() == 0 } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin'] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 paired-end [bam] no modules - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/nf-core/bam_rseqc/tests/main.nf.test.snap b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test.snap new file mode 100644 index 0000000..0040810 --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test.snap @@ -0,0 +1,903 @@ +{ + "sarscov2 paired-end [bam]": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,2675857864c1d1139b2a19d25dc36b09" + ] + ], + [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,a1acc9def0f64a5500d4c4cb47cbe32b" + ], + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,3fc037501f5899b5da009c8ce02fc25e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,58398b7d5a29a5e564f9e3c50b55996c" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,5859fbd5b42046d47e8b9aa85077f4ea" + ] + ], + [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,f9d0bfc239df637cd8aeda40ade3c59a" + ] + ], + [ + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d75e0f5d62fada8aa9449991b209554c" + ] + ], + [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,caa6e63dcb477aabb169882b2f30dadd" + ] + ], + [ + [ + { + "id": "test" + }, + "test.read_distribution.txt:md5,56893fdc0809d968629a363551a1655f" + ] + ], + [ + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,3c0325095cee4835b921e57d61c23dca" + ], + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,a859bc2031d46bf1cc4336205847caa3" + ], + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,ee8783399eec5a18522a6f08bece338b" + ] + ], + [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.summary.txt:md5,9d98447e178b89a89f6f5aba7a772fe6" + ] + ], + [ + "versions.yml:md5,60dc1afce7fecb7270e998a00ee393e2", + "versions.yml:md5,9bc3caee6aeb54f23f5296b499546515", + "versions.yml:md5,a25161998ca60d5ce4e9a86abbf1bcb8", + "versions.yml:md5,c175b92f50b5be38a8808cbf7ac7452e", + "versions.yml:md5,ca7e95cf7ff7cff5d052b890729f7ead", + "versions.yml:md5,d03beea3c68934c3f3623a005c1424d2", + "versions.yml:md5,f2f3ecd549045f7595245f904f5d9414", + "versions.yml:md5,fd16f1098b9c285f3ea7bd3daf4e8f10" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T12:07:54.452949" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "17": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "18": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "19": [ + [ + { + "id": "test" + }, + "test.read_distribution.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "20": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "21": [ + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "22": [ + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "23": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "24": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "25": [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "26": [ + "versions.yml:md5,60dc1afce7fecb7270e998a00ee393e2", + "versions.yml:md5,9bc3caee6aeb54f23f5296b499546515", + "versions.yml:md5,a25161998ca60d5ce4e9a86abbf1bcb8", + "versions.yml:md5,c175b92f50b5be38a8808cbf7ac7452e", + "versions.yml:md5,ca7e95cf7ff7cff5d052b890729f7ead", + "versions.yml:md5,d03beea3c68934c3f3623a005c1424d2", + "versions.yml:md5,f2f3ecd549045f7595245f904f5d9414", + "versions.yml:md5,fd16f1098b9c285f3ea7bd3daf4e8f10" + ], + "3": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bamstat_txt": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "inferexperiment_txt": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_all": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_distance": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_freq": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_mean": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_pdf": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_rscript": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_all": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_bed": [ + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_events_pdf": [ + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_interact_bed": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_log": [ + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_pdf": [ + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_rscript": [ + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_xls": [ + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionsaturation_all": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionsaturation_pdf": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionsaturation_rscript": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readdistribution_txt": [ + [ + { + "id": "test" + }, + "test.read_distribution.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_all": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_pdf": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_pos_xls": [ + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_rscript": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_seq_xls": [ + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tin_txt": [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,60dc1afce7fecb7270e998a00ee393e2", + "versions.yml:md5,9bc3caee6aeb54f23f5296b499546515", + "versions.yml:md5,a25161998ca60d5ce4e9a86abbf1bcb8", + "versions.yml:md5,c175b92f50b5be38a8808cbf7ac7452e", + "versions.yml:md5,ca7e95cf7ff7cff5d052b890729f7ead", + "versions.yml:md5,d03beea3c68934c3f3623a005c1424d2", + "versions.yml:md5,f2f3ecd549045f7595245f904f5d9414", + "versions.yml:md5,fd16f1098b9c285f3ea7bd3daf4e8f10" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T12:08:17.304769" + }, + "sarscov2 paired-end [bam] no modules - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "17": [ + + ], + "18": [ + + ], + "19": [ + + ], + "2": [ + + ], + "20": [ + + ], + "21": [ + + ], + "22": [ + + ], + "23": [ + + ], + "24": [ + + ], + "25": [ + + ], + "26": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "bamstat_txt": [ + + ], + "inferexperiment_txt": [ + + ], + "innerdistance_all": [ + + ], + "innerdistance_distance": [ + + ], + "innerdistance_freq": [ + + ], + "innerdistance_mean": [ + + ], + "innerdistance_pdf": [ + + ], + "innerdistance_rscript": [ + + ], + "junctionannotation_all": [ + + ], + "junctionannotation_bed": [ + + ], + "junctionannotation_events_pdf": [ + + ], + "junctionannotation_interact_bed": [ + + ], + "junctionannotation_log": [ + + ], + "junctionannotation_pdf": [ + + ], + "junctionannotation_rscript": [ + + ], + "junctionannotation_xls": [ + + ], + "junctionsaturation_all": [ + + ], + "junctionsaturation_pdf": [ + + ], + "junctionsaturation_rscript": [ + + ], + "readdistribution_txt": [ + + ], + "readduplication_all": [ + + ], + "readduplication_pdf": [ + + ], + "readduplication_pos_xls": [ + + ], + "readduplication_rscript": [ + + ], + "readduplication_seq_xls": [ + + ], + "tin_txt": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T12:08:24.443731" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_rseqc/tests/tags.yml b/subworkflows/nf-core/bam_rseqc/tests/tags.yml new file mode 100644 index 0000000..c8dfce3 --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_rseqc: + - subworkflows/nf-core/bam_rseqc/** diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 0000000..76e7a40 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,188 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test ("test_bam_stats_samtools_single_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 0000000..8ca2252 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,350 @@ +{ + "test_bam_stats_samtools_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:08:35.660286921" + }, + "test_bam_stats_samtools_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:08:24.220305512" + }, + "test_bam_stats_samtools_paired_end_cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:08:54.206770141" + }, + "test_bam_stats_samtools_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,291bb2393ec947140d12d42c2795b222" + ] + ], + [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:07:49.731645858" + }, + "test_bam_stats_samtools_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,8140d69cdedd77570ca1d7618a744e16" + ] + ], + [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:08:01.421996172" + }, + "test_bam_stats_samtools_paired_end_cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,1622856127bafd6cdbadee9cd64ec9b7" + ] + ], + [ + "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", + "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", + "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:08:12.640915756" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml new file mode 100644 index 0000000..ec2f2d6 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_stats_samtools: + - subworkflows/nf-core/bam_stats_samtools/** diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..0fcbf7b --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,124 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..ca964ce --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..a09572e --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..cb0e941 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,464 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + def manifest_doi = workflow.manifest.doi.tokenize(",") + // Handling multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + manifest_doi.each { doi_ref -> + temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + } + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + } + mqc_report = mqc_report[0] + } + } + } + catch (Exception all) { + if (multiqc_report) { + log.warn("[${workflow.manifest.name}] Could not attach MultiQC report to summary email") + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${params.analysis_id}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${params.analysis_id}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Analysis'] = params.analysis_id + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['Analysis'] = params.analysis_id + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception all) { + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..1dc317f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..1037232 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 0000000..4994303 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 0000000..f7d9f02 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 0000000..842dc43 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = 1 + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = 1 + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 0000000..0907ac5 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..331e0d2 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ] +} diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000..d0d5df5 --- /dev/null +++ b/tower.yml @@ -0,0 +1,3 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" diff --git a/workflows/dxnextflowrna.nf b/workflows/dxnextflowrna.nf new file mode 100644 index 0000000..c1d0f3e --- /dev/null +++ b/workflows/dxnextflowrna.nf @@ -0,0 +1,196 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// MODULES +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// SUBWORKFLOWS +include { FASTQ_BAM_QC } from '../subworkflows/local/fastq_bam_qc' +include { FASTQ_TRIM_FILTER_ALIGN_DEDUP } from '../subworkflows/local/fastq_trim_filter_align_dedup' + +// FUNCTIONS +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_umcugenetics_dxnextflowrna_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow DXNEXTFLOWRNA { + main: + def createMetaWithIdSimpleName = { file -> [[id: file.getSimpleName()], file] } + + // Reference file channels + ch_fasta_fai = Channel + .fromPath(params.fasta) + .combine(Channel.fromPath(params.fai)) + .map { fasta, fai -> [[id: fasta.getSimpleName()], fasta, fai] } + ch_gene_bed = Channel.fromPath(params.gene_bed) + ch_gtf = Channel + .fromPath(params.gtf) + .map(createMetaWithIdSimpleName) + .first() + ch_star_index = Channel + .fromPath(params.star_index) + .map(createMetaWithIdSimpleName) + .first() + ch_ref_flat = Channel.fromPath(params.ref_flat).first() + ch_rrna_interval = Channel.fromPath(params.rrna_intervals).first() + ch_sortmerna_fastas = Channel + .fromPath(params.rrna_database_manifest) + .splitCsv(by: 1, strip: true) + .map { line -> file(line[0], checkIfExists: true) } + .collect() + .map { files -> [[id: file(params.rrna_database_manifest).getSimpleName()], files] } + ch_sortmerna_index = Channel + .fromPath(params.sortmerna_index) + .map(createMetaWithIdSimpleName) + .first() + + // Input channel + ch_fastq = Channel + .fromFilePairs("${params.input}/*_R{1,2}_001.fastq.gz") + .map { meta, fastq -> + def fmeta = [:] + // Set meta.id + fmeta.id = meta + // Set meta.single_end + if (fastq.size() == 1) { + fmeta.single_end = true + } + else { + fmeta.single_end = false + } + return [fmeta, fastq] + } + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + // + // SUBWORKFLOW: Run fastq_trim_filter_align_dedup + // + FASTQ_TRIM_FILTER_ALIGN_DEDUP( + ch_fasta_fai, + ch_fastq, + ch_gtf, + ch_sortmerna_fastas, + ch_sortmerna_index, + ch_star_index, + params.seq_platform, + params.seq_center, + false, + ) + ch_versions = ch_versions.mix(FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.versions) + + // Add fastq_trim_filter_align_dedup results to MultiQC files + ch_multiqc_files = ch_multiqc_files.mix( + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.trim_log.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.trim_zip.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.sortmerna_log.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.star_align_log_final.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.star_align_read_per_gene_tab.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.umitools_dedup_log.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.samtools_stats.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.flagstat.collect { it[1] }.ifEmpty([]), + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.idxstats.collect { it[1] }.ifEmpty([]), + ) + + // + // SUBWORKFLOW: Run fastq_bam_qc + // + FASTQ_BAM_QC( + FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.ch_bam_bai, + ch_fasta_fai.map { meta, fasta, fai -> [fasta] }, + ch_fastq, + ch_gene_bed, + ch_ref_flat, + ch_rrna_interval, + ) + ch_versions = ch_versions.mix(FASTQ_BAM_QC.out.versions) + + // Add fastq_bam_qc results to MultiQC files + ch_multiqc_files = ch_multiqc_files.mix( + FASTQ_BAM_QC.out.fastqc_zip.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.bamstat_txt.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.inferexperiment_txt.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.innerdistance_freq.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.junctionannotation_all.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.junctionsaturation_rscript.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.readdistribution_txt.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.readduplication_pos_xls.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.tin_txt.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.rna_metrics.collect { it[1] }.ifEmpty([]), + FASTQ_BAM_QC.out.lc_extrap.collect { it[1] }.ifEmpty([]), + ) + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + sort: true, + newLine: true, + ) + .set { ch_collated_versions } + + + // + // MODULE: MultiQC + // + ch_multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + ch_multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.empty() + + // Collect workflow params + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + // Add methods description + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true, + ) + ) + // Collate software versions + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + + MULTIQC( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [], + ) + + emit: + multiqc_report = MULTIQC.out.report // channel: [ /path/to/multiqc_report.html] + versions = ch_versions // channel: [ path(versions.yml) ] +}