Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to the design and defaults #74

Merged
merged 19 commits into from
Sep 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# https://makefiletutorial.com/

run_stub:
bash ./data/mock_data/generate_mock_files.sh && nextflow run main.nf -entry TEST -stub-run
bash ./data/mock_data/generate_mock_files.sh && nextflow run main.nf -stub-run

run_dev:
nextflow run main.nf -profile dev -resume -with-tower

run_test:
nextflow run main.nf -params-file params/test.yml -entry TEST -resume -profile standard,docker
28 changes: 26 additions & 2 deletions conda_envs/setup_conda_envs.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,33 @@
#!/usr/bin/env bash

set -xue
set -e

# NOTE: Please replace `conda` with `mamba` if it is installed for faster installs.

# NOTE: The conda environments are expected by the `conda_local` profile to be created within `conda_envs` directory

conda env create -p mtbseq-nf-env --file mtbseq-nf-env.yml
# NOTE: Adding a step to automatically register the gatk jar
echo "Downloading GATK jar"

wget "https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2"

tar -xf GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 --wildcards '*.jar'

cp GenomeAnalysisTK-3.8-0-ge9d806836/GenomeAnalysisTK.jar .

echo "Creating mtbseq-nf-env"
mamba env create -p mtbseq-nf-env --file mtbseq-nf-env.yml

echo "Registering GATK Jar"

# TODO: Good candidate for a clean approach in a refactor.
eval "$(conda shell.bash hook)"
conda activate "./mtbseq-nf-env"
gatk-register GenomeAnalysisTK.jar

echo "Testing mtbseq-nf-env"
MTBseq --version
MTBseq --check

echo "Cleaning files"
rm -rf GenomeAnalysisTK*
4 changes: 4 additions & 0 deletions conf/conda.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ params {
conda_envs_location = "${projectDir}/conda_envs"
}

conda {
enabled = true
}

process {
withName:
'.*' {
Expand Down
18 changes: 1 addition & 17 deletions conf/docker.config
Original file line number Diff line number Diff line change
@@ -1,19 +1,3 @@
docker.enabled = true

process {
withName:
"TB.*" {
container = 'quay.io/biocontainers/mtbseq:1.0.3--pl526_1'
}

withName:
'FASTQC.*' {
container = 'quay.io/biocontainers/fastqc:0.11.9--0'
}

withName:
'MULTIQC.*' {
container = 'quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0'
}

}
process.container = 'ghcr.io/mtb-bioinformatics/mtbseq-nf:0.9.5'
11 changes: 0 additions & 11 deletions conf/gcp.config

This file was deleted.

19 changes: 2 additions & 17 deletions conf/singularity.config
Original file line number Diff line number Diff line change
@@ -1,19 +1,4 @@
docker.enabled = true

process {
withName:
"TB.*" {
container = 'quay.io/biocontainers/mtbseq:1.0.3--pl526_1'
}
singulariy.enabled = true

withName:
'FASTQC.*' {
container = 'quay.io/biocontainers/fastqc:0.11.9--0'
}

withName:
'MULTIQC.*' {
container = 'quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0'
}

}
process.container = 'ghcr.io/mtb-bioinformatics/mtbseq-nf:0.9.5'
9 changes: 0 additions & 9 deletions conf/stub.config

This file was deleted.

13 changes: 13 additions & 0 deletions container/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM mambaorg/micromamba

# The conda env file has been copied via the build script
COPY --chown=$MAMBA_USER:$MAMBA_USER mtbseq-nf-env.yml /tmp/mtbseq-nf-env.yml

RUN micromamba install -y -f /tmp/mtbseq-nf-env.yml -n base

RUN micromamba install -y -n base conda-forge::procps-ng \
&& micromamba clean -a -y

COPY --chown=$MAMBA_USER:$MAMBA_USER GenomeAnalysisTK.jar /tmp/GenomeAnalysisTK.jar

RUN /opt/conda/bin/gatk-register /tmp/GenomeAnalysisTK.jar
32 changes: 32 additions & 0 deletions container/build_and_publish.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash
set -uex

# NOTE: Make sure you've set the environment correctly and are logged in to the registry.
# NOTE: Login to github registry with `echo $CR_PAT | docker login ghcr.io -u USERNAME --password-stdin`

CONTAINER_TAG=0.9.5

DOCKER_NAMESPACE="ghcr.io/mtb-bioinformatics"

echo "Downloading and uncompressing GATK jar"
wget "https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2"
tar -xf GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 --wildcards '*.jar'
cp GenomeAnalysis*/*.jar .

echo "Coping the mtbseq-n env file"
cp ../conda_envs/mtbseq-nf-env.yml ./

echo "Building mtbseq-nf container ..."
CONTAINER_NAME=$DOCKER_NAMESPACE/"mtbseq-nf":$CONTAINER_TAG

echo "Container Name : $CONTAINER_NAME "
docker build -t $CONTAINER_NAME .
CONTAINER_ID=$(docker run -d $CONTAINER_NAME)
docker commit "$CONTAINER_ID" "$CONTAINER_NAME"
docker push $DOCKER_NAMESPACE/"mtbseq-nf":$CONTAINER_TAG
docker stop "$CONTAINER_ID"


echo "Deleting the copied files"
rm mtbseq-nf-env.yml
rm -rf GenomeAnalysisTK*
74 changes: 23 additions & 51 deletions conf/global_params.config → default_params.config
Original file line number Diff line number Diff line change
@@ -1,25 +1,18 @@
// NCBI API key
ncbi_api_key = ""
run_type = "folder" // OR "samplesheet"

genomeIds = ["ERR841438", "ERR841440"]

run_type = "local" // OR "sra" OR "samplesheet"
input_folder = "fastqs/*_{1,2}*fastq.gz"

input_samplesheet = "${projectDir}/samplesheet.csv"

reads = "data/full_data/*_{R1,R2}*fastq.gz"

//NOTE: Change the default to "parallel" after testing is done
analysis_mode = "batch" // OR "parallel"
parallel = false

library_name = "lib"
library_name = "illumina"

outdir = "results"


user = "root"

project = "prj"
project = "mtbseqnf"

// This can be customized for scenarios where the software binaries are dumped in a specific path and it isn't possible to use conda.
mtbseq_path = "MTBseq"
Expand All @@ -28,6 +21,19 @@ cohort_tsv = "${params.project}_cohort.tsv"

gatk38_jar = "${projectDir}/resources/GenomeAnalysisTK-3.8-0-ge9d806836/GenomeAnalysisTK.jar"

//-------------------------------------
// Default publication settings for all processes
// These can be overridden at process level
//-------------------------------------

save_mode = 'copy'
should_publish = true


//-------------------------------------
// OPTIONS FROM MTBSEQ MANUAL
//-------------------------------------

//NOTE: Setting this OPTION will skip all filtering steps and report the calculated information for all positions in the input file.
// The all_vars only needs to be activated in MTBseq. But in mtbseq-nf we'll specify it as false
all_vars = false
Expand Down Expand Up @@ -100,15 +106,10 @@ categories = "${projectDir}/data/references/cat/MTB_Gene_Categories.txt"
// This OPTION specifies a file for base quality recalibration. The list must be in VCF format and should contain known SNPs.
basecalib = "${projectDir}/data/references/res/MTB_Base_Calibration_List.vcf"

skip_qc = false



//TODO: Refactor this section to rely upon the withName selectors
// Module level parameters
TBBWA {
results_dir = "${params.outdir}/tbbwa"
save_mode = 'copy'
should_publish = true

// cpus = params.threads

Expand All @@ -122,8 +123,6 @@ TBBWA {

TBREFINE {
results_dir = "${params.outdir}/tbrefine"
save_mode = 'copy'
should_publish = true

// cpus = params.threads

Expand All @@ -138,8 +137,6 @@ TBREFINE {

TBPILE {
results_dir = "${params.outdir}/tbpile"
save_mode = 'copy'
should_publish = true

// cpus = params.threads

Expand All @@ -155,8 +152,6 @@ TBPILE {

TBLIST {
results_dir = "${params.outdir}/tblist"
save_mode = 'copy'
should_publish = true

// cpus = params.threads

Expand All @@ -173,8 +168,6 @@ TBLIST {

TBVARIANTS {
results_dir = "${params.outdir}/tbvariants"
save_mode = 'copy'
should_publish = true

// all_vars = params.all_vars
// snp_vars = params.snp_vars
Expand All @@ -195,11 +188,8 @@ TBVARIANTS {

TBSTATS {
results_dir = "${params.outdir}/tbstats"
project = params.project
save_mode = 'copy'
should_publish = true


// project = params.project
// all_vars = params.all_vars
// snp_vars = params.snp_vars
// lowfreq_vars = params.lowfreq_vars
Expand All @@ -219,10 +209,8 @@ TBSTATS {

TBJOIN {
results_dir = "${params.outdir}/tbjoin"
save_mode = 'copy'
should_publish = true
project = params.project

// project = params.project
// all_vars = params.all_vars
// snp_vars = params.snp_vars
// lowfreq_vars = params.lowfreq_vars
Expand All @@ -243,11 +231,8 @@ TBJOIN {

TBSTRAINS {
results_dir = "${params.outdir}/tbstrains"
save_mode = 'copy'
should_publish = true
project = params.project


// project = params.project
// all_vars = params.all_vars
// snp_vars = params.snp_vars
// lowfreq_vars = params.lowfreq_vars
Expand All @@ -269,9 +254,6 @@ TBSTRAINS {

TBAMEND {
results_dir = "${params.outdir}/tbamend"
save_mode = 'copy'
should_publish = true


// window = params.window
// unambig = params.unambig
Expand All @@ -290,10 +272,8 @@ TBAMEND {

TBGROUPS {
results_dir = "${params.outdir}/tbgroups"
save_mode = 'copy'
should_publish = true
project = params.project

// project = params.project
// distance = params.distance

// // ref = params.ref
Expand All @@ -307,8 +287,6 @@ TBGROUPS {

TBFULL {
results_dir = "${params.outdir}/tbfull"
save_mode = 'copy'
should_publish = true

// minbqual = params.minbqual
// mincovf = params.mincovf
Expand All @@ -331,18 +309,12 @@ TBFULL {

RENAME_FILES {
results_dir = "${params.outdir}/rename_files"
save_mode = 'copy'
should_publish = true
}

FASTQC {
results_dir = "${params.outdir}/fastqc"
save_mode = 'copy'
should_publish = true
}

MULTIQC {
results_dir = "${params.outdir}/multiqc"
save_mode = 'copy'
should_publish = true
}
Loading