Skip to content

Commit 3617ac1

Browse files
committed
udpating to use gofasta rather than datafunk as a speedup for alignment. Also piping sam from minimap2 to gofasta now rather than writing to a file
1 parent 266c287 commit 3617ac1

File tree

2 files changed

+17
-31
lines changed

2 files changed

+17
-31
lines changed

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies:
99
- pip=19.3.1
1010
- python=3.7
1111
- snakemake-minimal=5.13
12+
- gofasta
1213
- pip:
1314
- pandas==1.0.1
14-
- git+https://github.com/cov-ert/datafunk.git
1515
- git+https://github.com/cov-lineages/pangoLEARN.git

pangolin/scripts/pangolearn.smk

+16-30
Original file line numberDiff line numberDiff line change
@@ -65,44 +65,30 @@ rule parse_paf:
6565
else:
6666
unmapped.write(f"{record.id},failed to map\n")
6767

68-
69-
rule minimap2_to_reference:
68+
rule align_to_reference:
7069
input:
7170
fasta = rules.parse_paf.output.fasta,
7271
reference = config["reference_fasta"]
73-
output:
74-
sam = os.path.join(config["tempdir"],"reference_mapped.sam")
75-
log:
76-
os.path.join(config["tempdir"], "logs/minimap2_sam.log")
77-
shell:
78-
"""
79-
minimap2 -a -x asm5 -t {workflow.cores} {input.reference:q} {input.fasta:q} -o {output.sam:q} &> {log}
80-
"""
81-
82-
rule datafunk_trim_and_pad:
83-
input:
84-
sam = rules.minimap2_to_reference.output.sam,
85-
reference = config["reference_fasta"]
8672
params:
87-
trim_start = config["trim_start"],
88-
trim_end = config["trim_end"],
89-
insertions = os.path.join(config["tempdir"],"insertions.txt")
73+
trim_start = 265,
74+
trim_end = 29674
9075
output:
9176
fasta = os.path.join(config["aligndir"],"sequences.aln.fasta")
77+
log:
78+
os.path.join(config["outdir"], "logs/minimap2_sam.log")
9279
shell:
9380
"""
94-
datafunk sam_2_fasta \
95-
-s {input.sam:q} \
96-
-r {input.reference:q} \
97-
-o {output.fasta:q} \
98-
-t [{params.trim_start}:{params.trim_end}] \
99-
--pad \
100-
--log-inserts
81+
minimap2 -a -x asm5 -t {workflow.cores} {input.reference:q} {input.fasta:q} | \
82+
gofasta sam toMultiAlign \
83+
--reference {input.reference:q} \
84+
--trimstart {params.trim_start} \
85+
--trimend {params.trim_end} \
86+
--pad > {output.fasta:q}
10187
"""
10288

10389
rule pangolearn:
10490
input:
105-
fasta = rules.datafunk_trim_and_pad.output.fasta,
91+
fasta = rules.align_to_reference.output.fasta,
10692
model = config["trained_model"],
10793
header = config["header_file"],
10894
reference = config["reference_fasta"]
@@ -154,7 +140,7 @@ rule add_failed_seqs:
154140

155141
rule type_variants_b117:
156142
input:
157-
fasta = rules.datafunk_trim_and_pad.output.fasta,
143+
fasta = rules.align_to_reference.output.fasta,
158144
variants = config["b117_variants"],
159145
reference = config["reference_fasta"]
160146
output:
@@ -171,7 +157,7 @@ rule type_variants_b117:
171157

172158
rule type_variants_b1351:
173159
input:
174-
fasta = rules.datafunk_trim_and_pad.output.fasta,
160+
fasta = rules.align_to_reference.output.fasta,
175161
variants = config["b1351_variants"],
176162
reference = config["reference_fasta"]
177163
output:
@@ -188,7 +174,7 @@ rule type_variants_b1351:
188174

189175
rule type_variants_p2:
190176
input:
191-
fasta = rules.datafunk_trim_and_pad.output.fasta,
177+
fasta = rules.align_to_reference.output.fasta,
192178
variants = config["p2_variants"],
193179
reference = config["reference_fasta"]
194180
output:
@@ -206,7 +192,7 @@ rule type_variants_p2:
206192

207193
rule type_variants_p1:
208194
input:
209-
fasta = rules.datafunk_trim_and_pad.output.fasta,
195+
fasta = rules.align_to_reference.output.fasta,
210196
variants = config["p1_variants"],
211197
reference = config["reference_fasta"]
212198
output:

0 commit comments

Comments
 (0)