From 0e323b1ee64b4db65c36466460efba8faac85731 Mon Sep 17 00:00:00 2001 From: tdayris Date: Wed, 26 Jan 2022 15:27:53 +0100 Subject: [PATCH] feat: explicitly specify bwa index in bwa wrappers (#232) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [fix] (template): Missing code in wrappers' doc. Error #187 * [dev] (bwa): Automatic prefix detection (#228) * [dev] (bwa): Automatix prefix detection (#228) * [doc] (bwa): Snakefiles updated * [dev] (bwa): Aln was forgotten at first * [dev] (black): Reformatting * Update Snakefile_samtools * minimal formatting commit to trigger tests test logs are not available any more * revert test trigger change * feat!: remove tmp dir parameter in bwa mem wrapper as this is now handled automatically by snakemake * fix: fixed index adressing * fmt * fixes * fixes * fixed failing lint, fixed syntax * add missing logfile Co-authored-by: tdayris Co-authored-by: Johannes Köster Co-authored-by: David Laehnemann Co-authored-by: Johannes Köster --- bio/bwa-mem2/index/test/Snakefile | 6 +-- bio/bwa-mem2/index/wrapper.py | 6 ++- bio/bwa-mem2/mem-samblaster/test/Snakefile | 11 ++-- bio/bwa-mem2/mem-samblaster/wrapper.py | 8 ++- bio/bwa-mem2/mem/test/Snakefile | 15 +++--- bio/bwa-mem2/mem/test/Snakefile_picard | 15 +++--- bio/bwa-mem2/mem/test/Snakefile_samtools | 15 +++--- bio/bwa-mem2/mem/wrapper.py | 8 ++- bio/bwa/aln/test/Snakefile | 11 ++-- bio/bwa/aln/wrapper.py | 12 +++-- bio/bwa/index/test/Snakefile | 15 ++---- bio/bwa/index/wrapper.py | 4 +- bio/bwa/mem-samblaster/test/Snakefile | 11 ++-- bio/bwa/mem-samblaster/wrapper.py | 8 ++- bio/bwa/mem/test/Snakefile | 4 +- bio/bwa/mem/test/Snakefile_picard | 3 +- bio/bwa/mem/test/Snakefile_samtools | 5 +- bio/bwa/mem/wrapper.py | 13 +++-- bio/bwa/sampe/test/Snakefile | 17 ++++--- bio/bwa/sampe/test/Snakefile_picard | 17 ++++--- bio/bwa/sampe/test/Snakefile_samtools | 17 ++++--- bio/bwa/sampe/wrapper.py | 7 ++- bio/bwa/samse/test/Snakefile | 17 ++++--- bio/bwa/samse/test/Snakefile_picard | 17 ++++--- bio/bwa/samse/test/Snakefile_samtools | 17 ++++--- bio/bwa/samse/wrapper.py | 7 ++- bio/bwa/samxe/test/Snakefile | 35 +++++++------ bio/bwa/samxe/test/Snakefile_picard | 59 ++++++++++++---------- bio/bwa/samxe/test/Snakefile_samtools | 59 ++++++++++++---------- docs/generate_docs.py | 1 - meta/bio/bwa_mapping/test/Snakefile | 6 ++- 31 files changed, 253 insertions(+), 193 deletions(-) diff --git a/bio/bwa-mem2/index/test/Snakefile b/bio/bwa-mem2/index/test/Snakefile index 1abd05c42b..098b904288 100644 --- a/bio/bwa-mem2/index/test/Snakefile +++ b/bio/bwa-mem2/index/test/Snakefile @@ -1,6 +1,6 @@ rule bwa_mem2_index: input: - "{genome}" + "{genome}", output: "{genome}.0123", "{genome}.amb", @@ -8,8 +8,8 @@ rule bwa_mem2_index: "{genome}.bwt.2bit.64", "{genome}.pac", log: - "logs/bwa-mem2_index/{genome}.log" + "logs/bwa-mem2_index/{genome}.log", params: - prefix=lambda w: w.genome + prefix=lambda w: w.genome, wrapper: "master/bio/bwa-mem2/index" diff --git a/bio/bwa-mem2/index/wrapper.py b/bio/bwa-mem2/index/wrapper.py index 637379e1a5..0eedfcbcaa 100644 --- a/bio/bwa-mem2/index/wrapper.py +++ b/bio/bwa-mem2/index/wrapper.py @@ -16,7 +16,11 @@ raise ValueError("Please provide exactly one reference genome as input.") # Prefix that should be used for the database -prefix = snakemake.params.get("prefix", "") +prefix = None +if "prefix" in snakemake.params.keys(): + prefix = snakemake.params["prefix"] +else: + prefix = splitext(snakemake.output[0])[0] if len(prefix) > 0: prefix = "-p " + prefix diff --git a/bio/bwa-mem2/mem-samblaster/test/Snakefile b/bio/bwa-mem2/mem-samblaster/test/Snakefile index 3889ed6b23..a8218e6bb7 100644 --- a/bio/bwa-mem2/mem-samblaster/test/Snakefile +++ b/bio/bwa-mem2/mem-samblaster/test/Snakefile @@ -1,15 +1,16 @@ rule bwa_mem: input: - reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"] + reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"), output: bam="mapped/{sample}.bam", - index="mapped/{sample}.bam.bai" + index="mapped/{sample}.bam.bai", log: - "logs/bwa_mem2_sambamba/{sample}.log" + "logs/bwa_mem2_sambamba/{sample}.log", params: - index="genome.fasta", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", - sort_extra="-q" # Extra args for sambamba. + sort_extra="-q", # Extra args for sambamba. threads: 8 wrapper: "master/bio/bwa-mem2/mem-samblaster" diff --git a/bio/bwa-mem2/mem-samblaster/wrapper.py b/bio/bwa-mem2/mem-samblaster/wrapper.py index 4aa1b28501..dc7d7d2724 100644 --- a/bio/bwa-mem2/mem-samblaster/wrapper.py +++ b/bio/bwa-mem2/mem-samblaster/wrapper.py @@ -14,6 +14,12 @@ sort_extra = snakemake.params.get("sort_extra", "") samblaster_extra = snakemake.params.get("samblaster_extra", "") +index = snakemake.input.get("index", "") +if isinstance(index, str): + index = path.splitext(snakemake.input.idx)[0] +else: + index = path.splitext(snakemake.input.idx[0])[0] + log = snakemake.log_fmt_shell(stdout=False, stderr=True) # Check inputs/arguments. @@ -27,7 +33,7 @@ "(bwa-mem2 mem" " -t {snakemake.threads}" " {extra}" - " {snakemake.params.index}" + " {index}" " {snakemake.input.reads}" " | samblaster" " {samblaster_extra}" diff --git a/bio/bwa-mem2/mem/test/Snakefile b/bio/bwa-mem2/mem/test/Snakefile index 8049cdc1f3..3ff7ff9fa5 100644 --- a/bio/bwa-mem2/mem/test/Snakefile +++ b/bio/bwa-mem2/mem/test/Snakefile @@ -1,16 +1,17 @@ rule bwa_mem2_mem: input: - reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"] + reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", log: - "logs/bwa_mem2/{sample}.log" + "logs/bwa_mem2/{sample}.log", params: - index="genome.fasta", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", - sort="none", # Can be 'none', 'samtools' or 'picard'. - sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. - sort_extra="" # Extra args for samtools/picard. + sort="none", # Can be 'none', 'samtools' or 'picard'. + sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. + sort_extra="", # Extra args for samtools/picard. threads: 8 wrapper: "master/bio/bwa-mem2/mem" diff --git a/bio/bwa-mem2/mem/test/Snakefile_picard b/bio/bwa-mem2/mem/test/Snakefile_picard index 9fa1ea4c2d..dfdba92a9d 100644 --- a/bio/bwa-mem2/mem/test/Snakefile_picard +++ b/bio/bwa-mem2/mem/test/Snakefile_picard @@ -1,16 +1,17 @@ rule bwa_mem2_mem: input: - reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"] + reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", log: - "logs/bwa_mem/{sample}.log" + "logs/bwa_mem/{sample}.log", params: - index="genome.fasta", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", - sort="picard", # Can be 'none', 'samtools' or 'picard'. - sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. - sort_extra="" # Extra args for samtools/picard. + sort="picard", # Can be 'none', 'samtools' or 'picard'. + sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. + sort_extra="", # Extra args for samtools/picard. threads: 8 wrapper: "master/bio/bwa-mem2/mem" diff --git a/bio/bwa-mem2/mem/test/Snakefile_samtools b/bio/bwa-mem2/mem/test/Snakefile_samtools index 008682edf9..426c92ab9c 100644 --- a/bio/bwa-mem2/mem/test/Snakefile_samtools +++ b/bio/bwa-mem2/mem/test/Snakefile_samtools @@ -1,16 +1,17 @@ rule bwa_mem2_mem: input: - reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"] + reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome.fasta", ".amb", ".ann", ".bwt.2bit.64", ".pac"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", log: - "logs/bwa_mem/{sample}.log" + "logs/bwa_mem/{sample}.log", params: - index="genome.fasta", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", - sort="samtools", # Can be 'none', 'samtools' or 'picard'. - sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. - sort_extra="" # Extra args for samtools/picard. + sort="samtools", # Can be 'none', 'samtools' or 'picard'. + sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. + sort_extra="", # Extra args for samtools/picard. threads: 8 wrapper: "master/bio/bwa-mem2/mem" diff --git a/bio/bwa-mem2/mem/wrapper.py b/bio/bwa-mem2/mem/wrapper.py index 2bc5b4bdd8..1e0acaeb03 100644 --- a/bio/bwa-mem2/mem/wrapper.py +++ b/bio/bwa-mem2/mem/wrapper.py @@ -18,6 +18,12 @@ sort_order = snakemake.params.get("sort_order", "coordinate") sort_extra = snakemake.params.get("sort_extra", "") +index = snakemake.input.get("index", "") +if isinstance(index, str): + index = path.splitext(snakemake.input.idx)[0] +else: + index = path.splitext(snakemake.input.idx[0])[0] + log = snakemake.log_fmt_shell(stdout=False, stderr=True) # Check inputs/arguments. @@ -63,7 +69,7 @@ "(bwa-mem2 mem" " -t {snakemake.threads}" " {extra}" - " {snakemake.params.index}" + " {index}" " {snakemake.input.reads}" " | " + pipe_cmd + ") {log}" ) diff --git a/bio/bwa/aln/test/Snakefile b/bio/bwa/aln/test/Snakefile index 2efa219a6e..e840407c02 100644 --- a/bio/bwa/aln/test/Snakefile +++ b/bio/bwa/aln/test/Snakefile @@ -1,13 +1,14 @@ rule bwa_aln: input: - "reads/{sample}.{pair}.fastq" + fastq="reads/{sample}.{pair}.fastq", + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: - "sai/{sample}.{pair}.sai" + "sai/{sample}.{pair}.sai", params: - index="genome", - extra="" + extra="", log: - "logs/bwa_aln/{sample}.{pair}.log" + "logs/bwa_aln/{sample}.{pair}.log", threads: 8 wrapper: "master/bio/bwa/aln" diff --git a/bio/bwa/aln/wrapper.py b/bio/bwa/aln/wrapper.py index 87e67c6f96..a2c6a3dd94 100644 --- a/bio/bwa/aln/wrapper.py +++ b/bio/bwa/aln/wrapper.py @@ -5,18 +5,24 @@ __email__ = "julianderuiter@gmail.com" __license__ = "MIT" - +from os import path from snakemake.shell import shell extra = snakemake.params.get("extra", "") log = snakemake.log_fmt_shell(stdout=False, stderr=True) +index = snakemake.input.idx +if isinstance(index, str): + index = path.splitext(snakemake.input.idx)[0] +else: + index = path.splitext(snakemake.input.idx[0])[0] + shell( "bwa aln" " {extra}" " -t {snakemake.threads}" - " {snakemake.params.index}" - " {snakemake.input[0]}" + " {index}" + " {snakemake.input.fastq}" " > {snakemake.output[0]} {log}" ) diff --git a/bio/bwa/index/test/Snakefile b/bio/bwa/index/test/Snakefile index b1bdded812..9167cc43f1 100644 --- a/bio/bwa/index/test/Snakefile +++ b/bio/bwa/index/test/Snakefile @@ -1,16 +1,11 @@ rule bwa_index: input: - "{genome}.fasta" + "{genome}.fasta", output: - "{genome}.amb", - "{genome}.ann", - "{genome}.bwt", - "{genome}.pac", - "{genome}.sa" + idx=multiext("{genome}", ".amb", ".ann", ".bwt", ".pac", ".sa"), log: - "logs/bwa_index/{genome}.log" + "logs/bwa_index/{genome}.log", params: - prefix="{genome}", - algorithm="bwtsw" + algorithm="bwtsw", wrapper: - "master/bio/bwa/index" \ No newline at end of file + "master/bio/bwa/index" diff --git a/bio/bwa/index/wrapper.py b/bio/bwa/index/wrapper.py index 36172c0678..f75e5db8f0 100644 --- a/bio/bwa/index/wrapper.py +++ b/bio/bwa/index/wrapper.py @@ -3,7 +3,7 @@ __email__ = "patrik.smeds@gmail.com" __license__ = "MIT" -from os import path +from os.path import splitext from snakemake.shell import shell @@ -16,7 +16,7 @@ raise ValueError("Only one reference genome can be inputed!") # Prefix that should be used for the database -prefix = snakemake.params.get("prefix", "") +prefix = snakemake.params.get("prefix", splitext(snakemake.output.idx[0])[0]) if len(prefix) > 0: prefix = "-p " + prefix diff --git a/bio/bwa/mem-samblaster/test/Snakefile b/bio/bwa/mem-samblaster/test/Snakefile index 18875ddad9..bd63e40855 100644 --- a/bio/bwa/mem-samblaster/test/Snakefile +++ b/bio/bwa/mem-samblaster/test/Snakefile @@ -1,15 +1,16 @@ rule bwa_mem: input: - reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"] + reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: bam="mapped/{sample}.bam", - index="mapped/{sample}.bam.bai" + index="mapped/{sample}.bam.bai", log: - "logs/bwa_mem_sambamba/{sample}.log" + "logs/bwa_mem_sambamba/{sample}.log", params: - index="genome", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", - sort_extra="" # Extra args for sambamba. + sort_extra="", # Extra args for sambamba. threads: 8 wrapper: "master/bio/bwa/mem-samblaster" diff --git a/bio/bwa/mem-samblaster/wrapper.py b/bio/bwa/mem-samblaster/wrapper.py index 277853b2e7..85e9d69ac2 100644 --- a/bio/bwa/mem-samblaster/wrapper.py +++ b/bio/bwa/mem-samblaster/wrapper.py @@ -14,6 +14,12 @@ sort_extra = snakemake.params.get("sort_extra", "") samblaster_extra = snakemake.params.get("samblaster_extra", "") +index = snakemake.input.get("index", "") +if isinstance(index, str): + index = path.splitext(snakemake.input.idx)[0] +else: + index = path.splitext(snakemake.input.idx[0])[0] + log = snakemake.log_fmt_shell(stdout=False, stderr=True) # Check inputs/arguments. @@ -27,7 +33,7 @@ "(bwa mem" " -t {snakemake.threads}" " {extra}" - " {snakemake.params.index}" + " {index}" " {snakemake.input.reads}" " | samblaster" " {samblaster_extra}" diff --git a/bio/bwa/mem/test/Snakefile b/bio/bwa/mem/test/Snakefile index 353959771c..bbc4c8a617 100644 --- a/bio/bwa/mem/test/Snakefile +++ b/bio/bwa/mem/test/Snakefile @@ -1,17 +1,17 @@ rule bwa_mem: input: reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: "mapped/{sample}.bam", log: "logs/bwa_mem/{sample}.log", params: - index="genome", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", sorting="none", # Can be 'none', 'samtools' or 'picard'. sort_order="queryname", # Can be 'queryname' or 'coordinate'. sort_extra="", # Extra args for samtools/picard. - tmp_dir="/tmp/", # Path to temp dir. (optional) threads: 8 wrapper: "master/bio/bwa/mem" diff --git a/bio/bwa/mem/test/Snakefile_picard b/bio/bwa/mem/test/Snakefile_picard index 527346fad5..88c53b80ad 100644 --- a/bio/bwa/mem/test/Snakefile_picard +++ b/bio/bwa/mem/test/Snakefile_picard @@ -1,12 +1,13 @@ rule bwa_mem: input: reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: "mapped/{sample}.bam", log: "logs/bwa_mem/{sample}.log", params: - index="genome", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", sort="picard", # Can be 'none', 'samtools' or 'picard'. sort_order="queryname", # Can be 'queryname' or 'coordinate'. diff --git a/bio/bwa/mem/test/Snakefile_samtools b/bio/bwa/mem/test/Snakefile_samtools index 5a6202c08a..21bca1c89f 100644 --- a/bio/bwa/mem/test/Snakefile_samtools +++ b/bio/bwa/mem/test/Snakefile_samtools @@ -1,12 +1,13 @@ rule bwa_mem: input: reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: "mapped/{sample}.bam", log: "logs/bwa_mem/{sample}.log", params: - index="genome", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", sorting="samtools", # Can be 'none', 'samtools' or 'picard'. sort_order="queryname", # Can be 'queryname' or 'coordinate'. @@ -20,13 +21,13 @@ rule bwa_mem: rule bwa_mem_write_index: input: reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: "mapped_with_index/{sample}.bam", "mapped_with_index/{sample}.bam.csi", log: "logs/bwa_mem/{sample}.log", params: - index="genome", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", sorting="samtools", # Can be 'none', 'samtools' or 'picard'. sort_order="coordinate", # Can be 'queryname' or 'coordinate'. diff --git a/bio/bwa/mem/wrapper.py b/bio/bwa/mem/wrapper.py index 039ef5e80c..71c57c4d40 100644 --- a/bio/bwa/mem/wrapper.py +++ b/bio/bwa/mem/wrapper.py @@ -17,6 +17,13 @@ sort_order = snakemake.params.get("sort_order", "coordinate") sort_extra = snakemake.params.get("sort_extra", "") +index = snakemake.input.idx +if isinstance(index, str): + index = path.splitext(snakemake.input.idx)[0] +else: + index = path.splitext(snakemake.input.idx[0])[0] + + if re.search(r"-T\b", sort_extra) or re.search(r"--TMP_DIR\b", sort_extra): sys.exit( "You have specified temp dir (`-T` or `--TMP_DIR`) in params.sort_extra; this is automatically set from params.tmp_dir." @@ -24,10 +31,6 @@ log = snakemake.log_fmt_shell(stdout=False, stderr=True) -tmp_dir = snakemake.params.get("tmp_dir") -if tmp_dir: - tempfile.tempdir = tmp_dir - # Check inputs/arguments. if not isinstance(snakemake.input.reads, str) and len(snakemake.input.reads) not in { @@ -70,7 +73,7 @@ "(bwa mem" " -t {snakemake.threads}" " {extra}" - " {snakemake.params.index}" + " {index}" " {snakemake.input.reads}" " | " + pipe_cmd + ") {log}" ) diff --git a/bio/bwa/sampe/test/Snakefile b/bio/bwa/sampe/test/Snakefile index 0d0b17a158..072e9a0045 100644 --- a/bio/bwa/sampe/test/Snakefile +++ b/bio/bwa/sampe/test/Snakefile @@ -1,16 +1,17 @@ rule bwa_sampe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", params: - index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="none", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="none", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_sampe/{sample}.log" + "logs/bwa_sampe/{sample}.log", wrapper: "master/bio/bwa/sampe" diff --git a/bio/bwa/sampe/test/Snakefile_picard b/bio/bwa/sampe/test/Snakefile_picard index 9de71d1fa8..44f16a9d57 100644 --- a/bio/bwa/sampe/test/Snakefile_picard +++ b/bio/bwa/sampe/test/Snakefile_picard @@ -1,16 +1,17 @@ rule bwa_sampe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", params: - index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_sampe/{sample}.log" + "logs/bwa_sampe/{sample}.log", wrapper: "master/bio/bwa/sampe" diff --git a/bio/bwa/sampe/test/Snakefile_samtools b/bio/bwa/sampe/test/Snakefile_samtools index d280a81aad..32de193602 100644 --- a/bio/bwa/sampe/test/Snakefile_samtools +++ b/bio/bwa/sampe/test/Snakefile_samtools @@ -1,16 +1,17 @@ rule bwa_sampe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", params: - index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_sampe/{sample}.log" + "logs/bwa_sampe/{sample}.log", wrapper: "master/bio/bwa/sampe" diff --git a/bio/bwa/sampe/wrapper.py b/bio/bwa/sampe/wrapper.py index f8a5c2dcc3..37da1dd455 100644 --- a/bio/bwa/sampe/wrapper.py +++ b/bio/bwa/sampe/wrapper.py @@ -10,6 +10,11 @@ from snakemake.shell import shell +index = snakemake.input.get("idx", "") +if isinstance(index, str): + index = path.splitext(snakemake.input.idx)[0] +else: + index = path.splitext(snakemake.input.idx[0])[0] # Check inputs. if not len(snakemake.input.sai) == 2: @@ -61,7 +66,7 @@ shell( "(bwa sampe" " {extra}" - " {snakemake.params.index}" + " {index}" " {snakemake.input.sai}" " {snakemake.input.fastq}" " | " + pipe_cmd + ") {log}" diff --git a/bio/bwa/samse/test/Snakefile b/bio/bwa/samse/test/Snakefile index 384f969825..4bc826a1a5 100644 --- a/bio/bwa/samse/test/Snakefile +++ b/bio/bwa/samse/test/Snakefile @@ -1,16 +1,17 @@ rule bwa_samse: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", params: - index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="none", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="none", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_samse/{sample}.log" + "logs/bwa_samse/{sample}.log", wrapper: "master/bio/bwa/samse" diff --git a/bio/bwa/samse/test/Snakefile_picard b/bio/bwa/samse/test/Snakefile_picard index bb7a3d5a8c..a912b49a98 100644 --- a/bio/bwa/samse/test/Snakefile_picard +++ b/bio/bwa/samse/test/Snakefile_picard @@ -1,16 +1,17 @@ rule bwa_samse: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", params: - index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_samse/{sample}.log" + "logs/bwa_samse/{sample}.log", wrapper: "master/bio/bwa/samse" diff --git a/bio/bwa/samse/test/Snakefile_samtools b/bio/bwa/samse/test/Snakefile_samtools index 51ed9ff2a5..248c63e6cb 100644 --- a/bio/bwa/samse/test/Snakefile_samtools +++ b/bio/bwa/samse/test/Snakefile_samtools @@ -1,16 +1,17 @@ rule bwa_samse: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", + # Index can be a list of (all) files created by bwa, or one of them + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: - "mapped/{sample}.bam" + "mapped/{sample}.bam", params: - index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_samse/{sample}.log" + "logs/bwa_samse/{sample}.log", wrapper: "master/bio/bwa/samse" diff --git a/bio/bwa/samse/wrapper.py b/bio/bwa/samse/wrapper.py index 5086da7ed8..d0598faab6 100644 --- a/bio/bwa/samse/wrapper.py +++ b/bio/bwa/samse/wrapper.py @@ -10,6 +10,11 @@ from snakemake.shell import shell +index = snakemake.input.get("idx", "") +if isinstance(index, str): + index = path.splitext(snakemake.input.idx)[0] +else: + index = path.splitext(snakemake.input.idx[0])[0] # Extract arguments. extra = snakemake.params.get("extra", "") @@ -54,7 +59,7 @@ shell( "(bwa samse" " {extra}" - " {snakemake.params.index}" + " {index}" " {snakemake.input.sai}" " {snakemake.input.fastq}" " | " + pipe_cmd + ") {log}" diff --git a/bio/bwa/samxe/test/Snakefile b/bio/bwa/samxe/test/Snakefile index 37f5a8e419..99c62d54a7 100644 --- a/bio/bwa/samxe/test/Snakefile +++ b/bio/bwa/samxe/test/Snakefile @@ -1,59 +1,62 @@ rule bwa_sam_pe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], output: - "mapped/{sample}.pe.sam" + "mapped/{sample}.pe.sam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. sort="none", log: - "logs/bwa_sam_pe/{sample}.log" + "logs/bwa_sam_pe/{sample}.log", wrapper: "master/bio/bwa/samxe" + rule bwa_sam_se: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", output: - "mapped/{sample}.se.sam" + "mapped/{sample}.se.sam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. sort="none", log: - "logs/bwa_sam_se/{sample}.log" + "logs/bwa_sam_se/{sample}.log", wrapper: "master/bio/bwa/samxe" + rule bwa_bam_pe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], output: - "mapped/{sample}.pe.bam" + "mapped/{sample}.pe.bam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. sort="none", log: - "logs/bwa_bam_pe/{sample}.log" + "logs/bwa_bam_pe/{sample}.log", wrapper: "master/bio/bwa/samxe" + rule bwa_bam_se: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", output: - "mapped/{sample}.se.bam" + "mapped/{sample}.se.bam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. sort="none", log: - "logs/bwa_bam_se/{sample}.log" + "logs/bwa_bam_se/{sample}.log", wrapper: "master/bio/bwa/samxe" diff --git a/bio/bwa/samxe/test/Snakefile_picard b/bio/bwa/samxe/test/Snakefile_picard index 350c48404b..e161d0933c 100644 --- a/bio/bwa/samxe/test/Snakefile_picard +++ b/bio/bwa/samxe/test/Snakefile_picard @@ -1,67 +1,70 @@ rule bwa_sam_pe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], output: - "mapped/{sample}.pe.picard_sort.sam" + "mapped/{sample}.pe.picard_sort.sam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_sam_pe/{sample}.picard_sort.log" + "logs/bwa_sam_pe/{sample}.picard_sort.log", wrapper: "master/bio/bwa/samxe" + rule bwa_sam_se: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", output: - "mapped/{sample}.se.picard_sort.sam" + "mapped/{sample}.se.picard_sort.sam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_sam_se/{sample}.picard_sort.log" + "logs/bwa_sam_se/{sample}.picard_sort.log", wrapper: "master/bio/bwa/samxe" + rule bwa_bam_pe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], output: - "mapped/{sample}.pe.picard_sort.bam" + "mapped/{sample}.pe.picard_sort.bam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_bam_pe/{sample}.picard_sort.log" + "logs/bwa_bam_pe/{sample}.picard_sort.log", wrapper: "master/bio/bwa/samxe" + rule bwa_bam_se: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", output: - "mapped/{sample}.se.picard_sort.bam" + "mapped/{sample}.se.picard_sort.bam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="picard", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_bam_se/{sample}.picard_sort.log" + "logs/bwa_bam_se/{sample}.picard_sort.log", wrapper: "master/bio/bwa/samxe" diff --git a/bio/bwa/samxe/test/Snakefile_samtools b/bio/bwa/samxe/test/Snakefile_samtools index 9353972ad7..e56c71cfb5 100644 --- a/bio/bwa/samxe/test/Snakefile_samtools +++ b/bio/bwa/samxe/test/Snakefile_samtools @@ -1,67 +1,70 @@ rule bwa_sam_pe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], output: - "mapped/{sample}.pe.samtools_sort.sam" + "mapped/{sample}.pe.samtools_sort.sam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_sam_pe/{sample}.samtools_sort.log" + "logs/bwa_sam_pe/{sample}.samtools_sort.log", wrapper: "master/bio/bwa/samxe" + rule bwa_sam_se: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", output: - "mapped/{sample}.se.samtools_sort.sam" + "mapped/{sample}.se.samtools_sort.sam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_sam_se/{sample}.samtools_sort.log" + "logs/bwa_sam_se/{sample}.samtools_sort.log", wrapper: "master/bio/bwa/samxe" + rule bwa_bam_pe: input: fastq=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"] + sai=["sai/{sample}.1.sai", "sai/{sample}.2.sai"], output: - "mapped/{sample}.pe.samtools_sort.bam" + "mapped/{sample}.pe.samtools_sort.bam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_bam_pe/{sample}.samtools_sort.log" + "logs/bwa_bam_pe/{sample}.samtools_sort.log", wrapper: "master/bio/bwa/samxe" + rule bwa_bam_se: input: fastq="reads/{sample}.1.fastq", - sai="sai/{sample}.1.sai" + sai="sai/{sample}.1.sai", output: - "mapped/{sample}.se.samtools_sort.bam" + "mapped/{sample}.se.samtools_sort.bam", params: index="genome", - extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. - sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` - sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' - sort_extra="" # optional: extra arguments for samtools/picard + extra=r"-r '@RG\tID:{sample}\tSM:{sample}'", # optional: Extra parameters for bwa. + sort="samtools", # optional: Enable sorting. Possible values: 'none', 'samtools' or 'picard'` + sort_order="queryname", # optional: Sort by 'queryname' or 'coordinate' + sort_extra="", # optional: extra arguments for samtools/picard log: - "logs/bwa_bam_se/{sample}.samtools_sort.log" + "logs/bwa_bam_se/{sample}.samtools_sort.log", wrapper: "master/bio/bwa/samxe" diff --git a/docs/generate_docs.py b/docs/generate_docs.py index b5af78269d..f55bf1d352 100644 --- a/docs/generate_docs.py +++ b/docs/generate_docs.py @@ -119,7 +119,6 @@ def render_meta(path, target): used_wrappers = env["wrappers"] else: used_wrappers = [] - snakefile = render_snakefile(path) name = meta["name"].replace(" ", "_") + ".rst" diff --git a/meta/bio/bwa_mapping/test/Snakefile b/meta/bio/bwa_mapping/test/Snakefile index 1e2827404e..4713a96480 100644 --- a/meta/bio/bwa_mapping/test/Snakefile +++ b/meta/bio/bwa_mapping/test/Snakefile @@ -1,12 +1,12 @@ rule bwa_mem: input: - reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"] + reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + idx=multiext("genome", ".amb", ".ann", ".bwt", ".pac", ".sa"), output: "mapped/{sample}.bam" log: "logs/bwa_mem/{sample}.log" params: - index="genome", extra=r"-R '@RG\tID:{sample}\tSM:{sample}'", sort="samtools", # Can be 'none', 'samtools' or 'picard'. sort_order="coordinate", # Can be 'queryname' or 'coordinate'. @@ -20,6 +20,8 @@ rule samtools_index: "mapped/{sample}.bam" output: "mapped/{sample}.bam.bai" + log: + "logs/samtools_index/{sample}.log" params: "" # optional params string wrapper: