Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for regions file and arbitrary FAI/GZI paths #2936

Merged
merged 5 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions bio/samtools/faidx/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
name: samtools faidx
description: index reference sequence in FASTA format from reference sequence.
url: http://www.htslib.org/doc/samtools-faidx.html
authors:
- Michael Chambers
- Filipe G. Vieira
input:
- reference sequence file (.fa)
- regions: file with regions
- fai: index for reference file (optional)
- gzi: index for BGZip'ed reference file (optional)
output:
- indexed reference sequence file (.fai)
notes: |
* The `extra` param allows for additional program arguments (not `-o`).
* For more information see, http://www.htslib.org/doc/samtools-faidx.html
- fai: index for reference file (optional)
- gzi: index for BGZip'ed reference file (optional)
params:
- region: region to extract from input file (optional)
- extra: additional program arguments (not `-o`).
64 changes: 61 additions & 3 deletions bio/samtools/faidx/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,69 @@
rule samtools_index:
rule samtools_faidx:
input:
"{sample}.fa",
output:
"{sample}.fa.fai",
"out/{sample}.fa.fai",
log:
"{sample}.log",
params:
extra="", # optional params string
extra="",
wrapper:
"master/bio/samtools/faidx"


rule samtools_faidx_named:
input:
"{sample}.fa",
output:
fai="out/{sample}.named.fa.fai",
log:
"{sample}.named.log",
params:
extra="",
wrapper:
"master/bio/samtools/faidx"


rule samtools_faidx_bgzip:
input:
"{sample}.fa.bgz",
output:
fai="out/{sample}.fas.bgz.fai",
gzi="out/{sample}.fas.bgz.gzi",
log:
"{sample}.bzgip.log",
params:
extra="",
wrapper:
"master/bio/samtools/faidx"


rule samtools_faidx_region:
input:
"{sample}.fa",
fai="idx/{sample}.fa.fai",
output:
"out/{sample}.fas",
log:
"{sample}.region.log",
params:
region="ref",
extra="--length 5",
wrapper:
"master/bio/samtools/faidx"


rule samtools_faidx_bgzip_region:
input:
"{sample}.fa.bgz",
fai="idx/{sample}.fa.bgz.fai",
gzi="idx/{sample}.fa.bgz.gzi",
output:
"out/{sample}.bgz.fas",
log:
"{sample}.bgzip_region.log",
params:
region="ref",
extra="--length 5",
wrapper:
"master/bio/samtools/faidx"
Binary file added bio/samtools/faidx/test/genome.fa.bgz
Binary file not shown.
2 changes: 2 additions & 0 deletions bio/samtools/faidx/test/idx/genome.fa.bgz.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ref 45 5 45 46
ref2 40 57 40 41
Binary file added bio/samtools/faidx/test/idx/genome.fa.bgz.gzi
Binary file not shown.
2 changes: 2 additions & 0 deletions bio/samtools/faidx/test/idx/genome.fa.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ref 45 5 45 46
ref2 40 57 40 41
26 changes: 25 additions & 1 deletion bio/samtools/faidx/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,28 @@
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell("samtools faidx {samtools_opts} {extra} {snakemake.input[0]} {log}")

# Get regions (if present)
regions = snakemake.input.get("regions", "")
if regions:
regions = f"--regions-file {regions}"

region = snakemake.params.get("region", "")

# Get FAI and GZI files
if region or regions:
fai = snakemake.input.get("fai", "")
gzi = snakemake.input.get("gzi", "")
else:
fai = snakemake.output.get("fai", "")
gzi = snakemake.output.get("gzi", "")

if fai:
fai = f"--fai-idx {fai}"
if gzi:
gzi = f"--gzi-idx {gzi}"


shell(
"samtools faidx {fai} {gzi} {regions} {samtools_opts} {extra} {snakemake.input[0]} {region:q} {log}"
)
15 changes: 14 additions & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1453,6 +1453,7 @@ def test_goleft_indexcov():
["snakemake", "--cores", "1", "--use-conda", "-Fp"],
)


@skip_if_not_modified
def test_gridss_call():
run(
Expand Down Expand Up @@ -4163,7 +4164,19 @@ def test_samtools_fastq_separate():
def test_samtools_faidx():
run(
"bio/samtools/faidx",
["snakemake", "--cores", "1", "genome.fa.fai", "--use-conda", "-F"],
[
"snakemake",
"--cores",
"1",
"out/genome.fa.fai",
"out/genome.named.fa.fai",
"out/genome.fas.bgz.fai",
"out/genome.fas.bgz.gzi",
"out/genome.fas",
"out/genome.bgz.fas",
"--use-conda",
"-F",
],
)


Expand Down
Loading