-
Notifications
You must be signed in to change notification settings - Fork 2
/
ref.smk
106 lines (96 loc) · 3.43 KB
/
ref.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
rule get_genome:
output:
genome=expand(
"resources/{species}.{build}.{release}.fasta",
species=config["reference"]["species"],
build=config["reference"]["build"],
release=config["reference"]["release"],
),
log:
"logs/get-genome.log",
params:
species=config["reference"]["species"],
datatype="dna",
build=config["reference"]["build"],
release=config["reference"]["release"],
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"v1.25.0/bio/reference/ensembl-sequence"
rule genome_faidx:
input:
rules.get_genome.output.genome,
output:
index=expand(
"{genome}.fai",
genome=rules.get_genome.output.genome,
),
log:
"logs/genome-faidx.log",
cache: True
wrapper:
"v1.25.0/bio/samtools/faidx"
rule minimap2_index:
input:
target=rules.get_genome.output.genome,
output:
index=expand(
"resources/{species}.{build}.{release}.mmi",
species=config["reference"]["species"],
build=config["reference"]["build"],
release=config["reference"]["release"],
),
log:
"logs/minimap2_index/genome.log",
benchmark:
"benchmarks/minimap2_index/genome.txt"
params:
extra="", # optional additional args
cache: True
# Minimap2 uses at most three threads when indexing target sequences:
# https://lh3.github.io/minimap2/minimap2.html
threads: 3
wrapper:
"v1.25.0/bio/minimap2/index"
# TODO: create new ENSEMBL-REGULATORY-ANNOTATION snakemake wrapper
rule download_regulatory_annotation:
output:
"resources/regulatory_annotation.gff3.gz",
log:
"logs/download_regulatory_annotation.log",
params:
release=config["reference"].get("release", "107"),
benchmark:
"benchmarks/download_regulatory_annotation.txt"
cache: "omit-software" # save space and time with between workflow caching (see docs)
conda:
"../envs/wget.yaml"
shell:
"""wget https://ftp.ensembl.org/pub/release-{params.release}/regulation/homo_sapiens/homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.20220201.gff.gz --no-check-certificate -O {output} 2> {log}"""
rule download_repeatmasker_annotation:
output:
"resources/repeat_masker.fa.out.gz",
log:
"logs/download_repeatmasker_annotation.log",
params:
download_link=config["reference"].get("repeat_masker_download_link", ""),
benchmark:
"benchmarks/download_repeatmasker_annotation.txt"
cache: "omit-software" # save space and time with between workflow caching (see docs)
conda:
"../envs/wget.yaml"
shell:
"""wget {params.download_link} --no-check-certificate -O {output} 2> {log}"""
rule download_gene_annotation:
output:
"resources/gene_annotation.gff3.gz",
params:
species=config["reference"]["species"],
build=config["reference"]["build"],
release=config["reference"]["release"],
flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP.
branch="", # optional: specify branch
log:
"logs/download_gene_annotation.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"v1.25.0/bio/reference/ensembl-annotation"