diff --git a/bio/bazam/environment.yaml b/bio/bazam/environment.yaml new file mode 100644 index 0000000000..8ae1bbb412 --- /dev/null +++ b/bio/bazam/environment.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - bazam =1.0 + - snakemake-wrapper-utils ==0.5 \ No newline at end of file diff --git a/bio/bazam/meta.yaml b/bio/bazam/meta.yaml new file mode 100644 index 0000000000..2abe5a5d51 --- /dev/null +++ b/bio/bazam/meta.yaml @@ -0,0 +1,10 @@ +name: "bazam" +description: Bazam is a smarter way to realign reads from one genome to another. If you've tried to use Picard SAMtoFASTQ or samtools bam2fq before and ended up unsatisfied with complicated, long running inefficient pipelines, bazam might be what you wanted. Bazam will output FASTQ in a form that can stream directly into common aligners such as BWA or Bowtie2, so that you can quickly and easily realign reads without extraction to any intermediate format. Bazam can target a specific region of the genome, specified as a region or a gene name if you prefer. +url: https://github.com/ssadedin/bazam +authors: + - Christopher Schröder +input: + - BAM/CRAM file + - reference genome +output: + - fastq file diff --git a/bio/bazam/test/Snakefile b/bio/bazam/test/Snakefile new file mode 100644 index 0000000000..3846405620 --- /dev/null +++ b/bio/bazam/test/Snakefile @@ -0,0 +1,28 @@ +rule bazam_interleaved: + input: + bam="mapped/{sample}.bam", + bai="mapped/{sample}.bam.bai", + output: + reads="results/reads/{sample}.fastq.gz", + resources: + mem_mb=12000, + log: + "logs/bazam/{sample}.log", + wrapper: + "master/bio/bazam" + + +rule bazam_separated: + input: + bam="mapped/{sample}.cram", + bai="mapped/{sample}.cram.crai", + reference="genome.fasta", + output: + r1="results/reads/{sample}.r1.fastq.gz", + r2="results/reads/{sample}.r2.fastq.gz", + resources: + mem_mb=12000, + log: + "logs/bazam/{sample}.log", + wrapper: + "master/bio/bazam" diff --git a/bio/bazam/test/genome.fasta b/bio/bazam/test/genome.fasta new file mode 100644 index 0000000000..11d25dda64 --- /dev/null +++ b/bio/bazam/test/genome.fasta @@ -0,0 +1,2 @@ +>Sheila +GCTAGCTCAGAAAAAAAAAA diff --git a/bio/bazam/test/mapped/a.bam b/bio/bazam/test/mapped/a.bam new file mode 100644 index 0000000000..243bc19d34 Binary files /dev/null and b/bio/bazam/test/mapped/a.bam differ diff --git a/bio/bazam/test/mapped/a.bam.bai b/bio/bazam/test/mapped/a.bam.bai new file mode 100644 index 0000000000..a9b9979fed Binary files /dev/null and b/bio/bazam/test/mapped/a.bam.bai differ diff --git a/bio/bazam/test/mapped/a.cram b/bio/bazam/test/mapped/a.cram new file mode 100644 index 0000000000..0499f5980c Binary files /dev/null and b/bio/bazam/test/mapped/a.cram differ diff --git a/bio/bazam/test/mapped/a.cram.crai b/bio/bazam/test/mapped/a.cram.crai new file mode 100644 index 0000000000..b34400a884 Binary files /dev/null and b/bio/bazam/test/mapped/a.cram.crai differ diff --git a/bio/bazam/wrapper.py b/bio/bazam/wrapper.py new file mode 100644 index 0000000000..a944742972 --- /dev/null +++ b/bio/bazam/wrapper.py @@ -0,0 +1,34 @@ +__author__ = "Christopher Schröder" +__copyright__ = "Copyright 2022, Christopher Schröder" +__email__ = "christopher.schroeder@tu-dortmund.de" +__license__ = "MIT" + +from snakemake.shell import shell +from snakemake_wrapper_utils.java import get_java_opts + +java_opts = get_java_opts(snakemake) + +log = snakemake.log_fmt_shell(stdout=False, stderr=True) +bam = snakemake.input.bam + +# Extra parameters default value is an empty string +extra = snakemake.params.get("extra", "") + +if bam.endswith(".cram"): + if not (reference := snakemake.input.get("reference", "")): + raise ValueError( + "input 'reference' is required when working with CRAM input files" + ) + reference_cmd = f"-Dsamjdk.reference_fasta={reference}" +else: + reference_cmd = "" + +# Extract arguments. +if reads := snakemake.output.get("reads", ""): + out_cmd = f"-o {reads}" +elif (r1 := snakemake.output.get("r1", "")) and (r2 := snakemake.output.get("r2", "")): + out_cmd = f"-r1 {r1} -r2 {r2}" +else: + raise ValueError("either 'reads' or 'r1' and 'r2' must be specified in output") + +shell("(bazam {java_opts} {reference_cmd} {extra} -bam {bam} {out_cmd}) {log}") diff --git a/test.py b/test.py index 3b4304f899..575571f57e 100644 --- a/test.py +++ b/test.py @@ -4853,4 +4853,18 @@ def test_calc_consensus_reads(): run( "meta/bio/calc_consensus_reads/", ["snakemake", "--cores", "1", "--use-conda", "-F", "results/consensus/sampleA.bam"], + ) + +@skip_if_not_modified +def test_bazam_interleaved(): + run( + "bio/bazam", + ["snakemake", "--cores", "1", "--use-conda", "-F", "results/reads/a.fastq.gz"], + ) + +@skip_if_not_modified +def test_bazam_separated(): + run( + "bio/bazam", + ["snakemake", "--cores", "1", "--use-conda", "-F", "results/reads/a.r1.fastq.gz"], ) \ No newline at end of file