-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathntedit-sealer
executable file
·129 lines (109 loc) · 4.03 KB
/
ntedit-sealer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/make -rRf
# Input files
seqs=None
seqs_basename=$(basename $(notdir $(seqs)))
reads=None
# Common parameters
k=None
t=8
# ntEdit parameters
X=0.5
Y=0.5
# abyss-bloom parameters
b=None
# Sealer parameters
L=100
P=10
# Set shell
SHELL=bash -e -o pipefail
ifeq ($(shell zsh -e -o pipefail -c 'true' 2>/dev/null; echo $$?), 0)
# Set pipefail to ensure that all commands of a pipe succeed.
SHELL=zsh -e -o pipefail
# Report run time and memory usage with zsh.
export REPORTTIME=1
export TIMEFMT=time user=%U system=%S elapsed=%E cpu=%P memory=%M job=%J
endif
# Parallelize compression
ifneq ($(shell command -v pigz),)
gzip=pigz -p$t
else
gzip=gzip
endif
# Record run time and memory usage in a file using GNU time
ifeq ($(time),true)
ifneq ($(shell command -v gtime),)
log_time=command gtime -v -o [email protected]
else
log_time=command time -v -o [email protected]
endif
else
log_time=
endif
# Determine path to executables
protocol=$(shell dirname $(realpath $(MAKEFILE_LIST)))
.PHONY: all abyss_bloom nthits ntedit sealer
.DELETE_ON_ERROR:
.SECONDARY:
help:
@echo "ntEdit+Sealer assembly finishing protocol v1.0.0"
@echo ""
@echo "Usage: ntedit-sealer finish [OPTION=VALUE]"
@echo ""
@echo "General options:"
@echo "seqs Draft assembly name [seqs]. File must have .fa extension"
@echo "reads Read file(s). All files must have .fq.gz extension. Must be separated by spaces and surrounded by quotes"
@echo "k K-mer sizes. List must be descending, separated by spaces and surrounded by quotes"
@echo "t Number of threads [8]"
@echo "time If True, will log the time for each step [False]"
@echo ""
@echo "ntEdit options:"
@echo "X Ratio of number of kmers in the k subset that should be missing in order to attempt fix (higher=stringent) [0.5]"
@echo "Y Ratio of number of kmers in the k subset that should be present to accept an edit (higher=stringent) [0.5]"
@echo ""
@echo "ABySS-bloom options:"
@echo "b Bloom filter size (e.g. 100M)"
@echo ""
@echo "Sealer options:"
@echo "L Length of flanks to be used as pseudoreads [100]"
@echo "P Maximum alternate paths to merge; use 'nolimit' for no limit [10]"
@echo ""
@echo "Notes:"
@echo " - Pass all parameter list values (reads, k) as space-separated values surrounded by quotation marks, e.g. k='80 65 50'"
@echo " - Ensure that all input files are in the current working directory, making soft-links if needed"
@echo " - K-mer lengths will be used in the order they are provided. Ensure that they are sorted in descending order (largest to smallest)"
# Prepare targets
nthits_bfs=$(patsubst %,sr_solid_k%.bf,$(k))
abyss_bloom_bfs=$(patsubst %,k%.bloom.z,$(k))
# Main targets
finish: ntedit_sealer
nthits: $(nthits_bfs)
abyss_bloom: $(abyss_bloom_bfs)
ntedit: nthits $(seqs_basename).ntedit_edited.fa
ntedit_sealer: abyss_bloom $(seqs_basename).ntedit_edited.prepd.sealer_scaffold.fa
# Create bloom filters
# ntCard - only needed for ntHits v1.0.0+
ntcard_hist_k%.hist: $(reads)
$(log_time) ntcard -k$* -t$t -p ntcard_hist $(reads)
# ntEdit BFs
ifeq ($(shell nthits --version 2>&1 |grep "0.1.0"),) # Indicates ntHits v1.0.0+
sr_solid_k%.bf: ntcard_hist_k%.hist $(reads)
$(log_time) nthits bf -f $< --solid -t $t -k $* -o sr_solid_k$*.bf $(reads)
else
sr_solid_k%.bf: $(reads)
$(log_time) nthits -b36 --outbloom --solid -p sr_solid -k$* -t$t $(reads)
endif
# Sealer BFs
k%.bloom.z: $(reads)
$(log_time) abyss-bloom build -v -v -k$* -j$t -b$b -l2 -q15 - $(reads) | $(gzip) -c > $@
# Run ntEdit iteratively
$(seqs_basename).ntedit_edited.fa: $(nthits_bfs)
$(log_time) $(protocol)/bin/run_ntedit.sh $(seqs_basename) "$(nthits_bfs)" "$(k)" $X $Y $t $@
# Consolidate soft masks
%.prepd.fa: %.fa
$(log_time) python3 $(protocol)/bin/mask_short_sequences.py -s -k$(lastword $(k)) $< > $@
# Run Sealer
%.sealer_scaffold.fa: %.fa $(abyss_bloom_bfs)
$(log_time) abyss-sealer -v -S $< -t $*-sealed-trace.txt \
-o $*.sealer -L$L -j$t -P$P --lower \
$(foreach val,$(k),$(subst %,$(val),-k% --input-bloom=<($(gzip) -d -c k%.bloom.z)))
@echo "ntEdit and Sealer polishing steps complete! Polished assembly can be found in: $@"