From 86d1ddc96f654d7d5b220389b13f20eb8aa768b8 Mon Sep 17 00:00:00 2001 From: wannesm Date: Fri, 7 Oct 2022 22:43:07 +0200 Subject: [PATCH 01/59] use deploy branch for dist --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ee5ceaaa..5040a479 100644 --- a/Makefile +++ b/Makefile @@ -108,7 +108,7 @@ prepare_tag: @echo "Check whether repo is clean" git diff-index --quiet HEAD @echo "Check correct branch" - if [[ "$$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then echo 'Not master branch'; exit 1; fi + if [[ "$$(git rev-parse --abbrev-ref HEAD)" != "deploy" ]]; then echo 'Not deploy branch'; exit 1; fi @echo "Add tag" git tag "v$$(python3 setup.py --version)" git push --tags From c228842b6e0acc0501768bd8d36e51ee59a7d610 Mon Sep 17 00:00:00 2001 From: wannesm Date: Sat, 8 Oct 2022 00:40:40 +0200 Subject: [PATCH 02/59] double to float --- dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx | 2 +- dtaidistance/jinja/generate.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx b/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx index 4cc070e8..e4a09861 100644 --- a/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx @@ -51,7 +51,7 @@ def warping_paths{{ suffix }}( else: try: # Use cython.view.array to avoid numpy dependency - wps = cvarray(shape=shape, itemsize=sizeof({{seq_tpy}}), format="d") + wps = cvarray(shape=shape, itemsize=sizeof({{seq_tpy}}), format="{{seq_format}}") except MemoryError as exc: print("Cannot allocate memory for warping paths matrix. Trying " + str(shape) + ".") raise exc diff --git a/dtaidistance/jinja/generate.py b/dtaidistance/jinja/generate.py index 50e1815b..8ea11c0d 100755 --- a/dtaidistance/jinja/generate.py +++ b/dtaidistance/jinja/generate.py @@ -21,32 +21,35 @@ seq_t = "double" seq_tpy = "double" +seq_format = "d" # https://docs.python.org/3/library/array.html # Also change the type in lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h +set_vars = {"seq_tpy": seq_tpy, "seq_t": seq_t, "seq_format": seq_format} + targets = { "dtw_cc.pyx": ["dtw_cc.jinja.pyx", - {"seq_tpy": seq_tpy, "seq_t": seq_t}, + set_vars, ["dtw_cc_warpingpaths.jinja.pyx", "dtw_cc_distancematrix.jinja.pyx", "dtw_cc_warpingpath.jinja.pyx", "dtw_cc_dba.jinja.pyx"]], "dtw_cc_omp.pyx": ["dtw_cc_omp.jinja.pyx", - {"seq_tpy": seq_tpy, "seq_t": seq_t}, + set_vars, []], "dtw_cc.pxd": ["dtw_cc.jinja.pxd", - {"seq_tpy": seq_tpy, "seq_t": seq_t}, + set_vars, []], "dtaidistancec_globals.pxd": ["dtaidistancec_globals.jinja.pxd", - {"seq_tpy": seq_tpy, "seq_t": seq_t}, + set_vars, []], "ed_cc.pyx": ["ed_cc.jinja.pyx", - {"seq_tpy": seq_tpy, "seq_t": seq_t}, + set_vars, []], } essential_targets = ['dtw_cc.pyx', 'dtw_cc.pxd', 'dtaidistancec_globals.pxd'] From 9aa77674b85a03440f9e0c8df3b3438df0531e0e Mon Sep 17 00:00:00 2001 From: wannesm Date: Sat, 8 Oct 2022 12:17:35 +0200 Subject: [PATCH 03/59] jinja: force regenerate all --- dtaidistance/jinja/Makefile | 11 +++++++++++ dtaidistance/jinja/generate.py | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dtaidistance/jinja/Makefile b/dtaidistance/jinja/Makefile index 814e07bb..2bc72021 100644 --- a/dtaidistance/jinja/Makefile +++ b/dtaidistance/jinja/Makefile @@ -5,6 +5,9 @@ DEPS_dtw_cc_pxd = $(shell python3 generate.py -dq dtw_cc.pxd) DEPS_ed_cc = $(shell python3 generate.py -dq ed_cc.pyx) +.PHONY: default +default: clean generate + .PHONY: generate generate: jinja replace @@ -55,3 +58,11 @@ debug: @echo $(DEPS_globals) @echo $(DEPS_dtw_cc_pxd) +.PHONY: clean +clean: + rm -f dtw_cc.pyx + rm -f dtw_cc_omp.pyx + rm -f dtw_cc.pxd + rm -f ed_cc.pyx + rm -f dtaidistancec_globals.pxd + diff --git a/dtaidistance/jinja/generate.py b/dtaidistance/jinja/generate.py index 8ea11c0d..0742dd79 100755 --- a/dtaidistance/jinja/generate.py +++ b/dtaidistance/jinja/generate.py @@ -52,7 +52,8 @@ set_vars, []], } -essential_targets = ['dtw_cc.pyx', 'dtw_cc.pxd', 'dtaidistancec_globals.pxd'] +essential_targets = ['dtw_cc.pyx', 'dtw_cc.pxd', 'dtaidistancec_globals.pxd', + 'dtw_cc_omp.pyx', 'ed_cc.pyx'] def generate(target): From 1907b4aab0b3eaecba98a5b792cffb2b4c154c4d Mon Sep 17 00:00:00 2001 From: wannesm Date: Sun, 9 Oct 2022 00:07:24 +0200 Subject: [PATCH 04/59] subseq lb --- Makefile | 4 ++ dtaidistance/dtw.py | 2 +- dtaidistance/subsequence/dtw.py | 18 +++++-- tests/test_subsequence.py | 85 +++++++++++++++++++++++---------- 4 files changed, 80 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index 5040a479..984423cf 100644 --- a/Makefile +++ b/Makefile @@ -57,6 +57,10 @@ benchmark-matrixc: benchmark-clustering: export PYTHONPATH=.;py.test -k cluster ${BENCHMARKSETTINGS} +.PHONY: benchmark-subseqsearch +benchmark-subseqsearch: + export PYTHONPATH=.;py.test -k subseqsearch_eeg ${BENCHMARKSETTINGS} + .PHONY: clean clean: diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 947faf77..910aea18 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -152,7 +152,7 @@ def __str__(self): def lb_keogh(s1, s2, window=None, max_dist=None, - max_step=None, max_length_diff=None): + max_step=None, max_length_diff=None, use_c=False): """Lowerbound LB_KEOGH""" # TODO: This implementation slower than distance() in C if window is None: diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index be996362..5968d388 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -491,7 +491,7 @@ def best_path(self, row, col): return p -def subsequence_search(query, series, dists_options=None): +def subsequence_search(query, series, dists_options=None, use_lb=False): """See SubsequenceSearch. :param query: Time series to search for @@ -500,7 +500,7 @@ def subsequence_search(query, series, dists_options=None): :param dists_options: Options passed on to dtw.distance :return: SubsequenceSearch object """ - ss = SubsequenceSearch(query, series, dists_options=dists_options) + ss = SubsequenceSearch(query, series, dists_options=dists_options, use_lb=use_lb) return ss @@ -533,7 +533,7 @@ def __repr__(self): class SubsequenceSearch: - def __init__(self, query, s, dists_options=None): + def __init__(self, query, s, dists_options=None, use_lb=False): """Search the best matching (subsequence) time series compared to a given time series. :param query: Time series to search for @@ -544,12 +544,20 @@ def __init__(self, query, s, dists_options=None): self.query = query self.s = s self.distances = None + self.lbs = None self.k = None self.dists_options = {} if dists_options is None else dists_options + self.use_lb = use_lb def reset(self): self.distances = None + def compute_lbs(self): + self.lbs = np.zeros((len(self.s),)) + for idx, series in enumerate(self.s): + self.lbs[idx] = dtw.lb_keogh(self.query, series, **self.dists_options) + print(self.lbs) + def align_fast(self, k=None): self.dists_options['use_c'] = True return self.align(k=k) @@ -558,10 +566,14 @@ def align(self, k=None): if self.distances is not None and self.k >= k: return self.distances = np.zeros((len(self.s),)) + if self.use_lb: + self.compute_lbs() import heapq h = [-np.inf] max_dist = np.inf for idx, series in enumerate(self.s): + if self.use_lb and self.lbs[idx] > max_dist: + continue dist = dtw.distance(self.query, series, **self.dists_options) if k is not None: if len(h) < k: diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index 22c6666c..ab3408fc 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -211,32 +211,45 @@ def test_dtw_localconcurrences_short(): plt.close(fig) +def create_data_subseqsearch_eeg(np): + data_fn = Path(__file__).parent / 'rsrc' / 'EEGRat_10_1000.txt' + data = np.loadtxt(data_fn) + series = np.array(data[1500:1700]) + query = np.array(data[1331:1352]) + # print(f'{len(series)=}') + + k = 3 + s = [] + s_idx = [] + w = 22 # window size + ws = int(np.floor(w / 2)) # shift size + wn = int(np.floor((len(series) - (w - ws)) / ws)) + si, ei = 0, w + for i in range(wn): + s.append(series[si:ei]) + s_idx.append(si) + si += ws + ei += ws + return query, s, k, series, s_idx + + @numpyonly -def test_dtw_subseqsearch_eeg(): +@pytest.mark.benchmark(group="subseqsearch_eeg") +def test_dtw_subseqsearch_eeg(benchmark): with util_numpy.test_uses_numpy() as np: - data_fn = Path(__file__).parent / 'rsrc' / 'EEGRat_10_1000.txt' - data = np.loadtxt(data_fn) - series = np.array(data[1500:1700]) - query = np.array(data[1331:1352]) - # print(f'{len(series)=}') - - k = 3 - s = [] - s_idx = [] - w = 22 # window size - ws = int(np.floor(w/2)) # shift size - wn = int(np.floor((len(series) - (w - ws)) / ws)) - si, ei = 0, w - for i in range(wn): - s.append(series[si:ei]) - s_idx.append(si) - si += ws - ei += ws - tic = time.perf_counter() - sa = subsequence_search(query, s, dists_options={'use_c': True}) - best = sa.kbest_matches_fast(k=k) - toc = time.perf_counter() - print("Searching performed in {:0.4f} seconds".format(toc - tic)) + query, s, k, series, s_idx = create_data_subseqsearch_eeg(np) + + def run(): + sa = subsequence_search(query, s, dists_options={'use_c': True}) + best = sa.kbest_matches_fast(k=k) + return best + if benchmark is None: + tic = time.perf_counter() + best = run() + toc = time.perf_counter() + print("Searching performed in {:0.4f} seconds".format(toc - tic)) + else: + best = benchmark(run) # print(sa.distances) # print(best) @@ -274,6 +287,27 @@ def test_dtw_subseqsearch_eeg(): plt.close(fig) +@numpyonly +@pytest.mark.benchmark(group="subseqsearch_eeg") +def test_dtw_subseqsearch_eeg_ub(benchmark): + with util_numpy.test_uses_numpy() as np: + query, s, k, series, s_idx = create_data_subseqsearch_eeg(np) + + def run(): + sa = subsequence_search(query, s, dists_options={'use_c': True}, use_lb=True) + best = sa.kbest_matches_fast(k=k) + return best + if benchmark is None: + tic = time.perf_counter() + best = run() + toc = time.perf_counter() + print("Searching performed in {:0.4f} seconds".format(toc - tic)) + else: + best = benchmark(run) + # print(sa.distances) + # print(best) + + if __name__ == "__main__": directory = Path(os.environ.get('TESTDIR', Path(__file__).parent)) print("Saving files to {}".format(directory)) @@ -284,5 +318,6 @@ def test_dtw_subseqsearch_eeg(): # test_dtw_subseq_bug1() # test_dtw_subseq_ndim() # test_dtw_localconcurrences_eeg() - test_dtw_subseqsearch_eeg() + test_dtw_subseqsearch_eeg(benchmark=None) + # test_dtw_subseqsearch_eeg_ub(benchmark=None) # test_dtw_localconcurrences_short() From 01a1405783f2fb3fa0380049332f59d4bc83557c Mon Sep 17 00:00:00 2001 From: wannesm Date: Sun, 9 Oct 2022 23:43:07 +0200 Subject: [PATCH 05/59] test subsequence: diff type --- tests/test_subsequence.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index ab3408fc..22d8b3b1 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -211,11 +211,11 @@ def test_dtw_localconcurrences_short(): plt.close(fig) -def create_data_subseqsearch_eeg(np): +def create_data_subseqsearch_eeg(np, dtype=None): data_fn = Path(__file__).parent / 'rsrc' / 'EEGRat_10_1000.txt' data = np.loadtxt(data_fn) - series = np.array(data[1500:1700]) - query = np.array(data[1331:1352]) + series = np.array(data[1500:1700], dtype=dtype) + query = np.array(data[1331:1352], dtype=dtype) # print(f'{len(series)=}') k = 3 From fc4b5706be6a42b64973065138a88f6865b6455a Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 10 Oct 2022 12:48:23 +0200 Subject: [PATCH 06/59] subseqsearch: do not keep all distances --- dtaidistance/subsequence/dtw.py | 50 ++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index 5968d388..b12b73e5 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -491,7 +491,7 @@ def best_path(self, row, col): return p -def subsequence_search(query, series, dists_options=None, use_lb=False): +def subsequence_search(query, series, dists_options=None, use_lb=False, keep_all_distances=False): """See SubsequenceSearch. :param query: Time series to search for @@ -500,7 +500,8 @@ def subsequence_search(query, series, dists_options=None, use_lb=False): :param dists_options: Options passed on to dtw.distance :return: SubsequenceSearch object """ - ss = SubsequenceSearch(query, series, dists_options=dists_options, use_lb=use_lb) + ss = SubsequenceSearch(query, series, dists_options=dists_options, use_lb=use_lb, + keep_all_distances=keep_all_distances) return ss @@ -533,7 +534,7 @@ def __repr__(self): class SubsequenceSearch: - def __init__(self, query, s, dists_options=None, use_lb=False): + def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distances=False): """Search the best matching (subsequence) time series compared to a given time series. :param query: Time series to search for @@ -548,6 +549,9 @@ def __init__(self, query, s, dists_options=None, use_lb=False): self.k = None self.dists_options = {} if dists_options is None else dists_options self.use_lb = use_lb + self.keep_all_distances = keep_all_distances + if self.use_lb and not self.keep_all_distances: + raise ValueError("If use_lb is true, then keep_all_distances should also be true.") def reset(self): self.distances = None @@ -565,27 +569,33 @@ def align_fast(self, k=None): def align(self, k=None): if self.distances is not None and self.k >= k: return - self.distances = np.zeros((len(self.s),)) - if self.use_lb: - self.compute_lbs() + if self.keep_all_distances: + self.distances = np.zeros((len(self.s),)) + if self.use_lb: + self.compute_lbs() import heapq - h = [-np.inf] + h = [(-np.inf, -1)] max_dist = np.inf for idx, series in enumerate(self.s): if self.use_lb and self.lbs[idx] > max_dist: continue + print(f'{len(self.query)=} * {len(series)=} = {len(self.query)*len(series)*8/1024**2}MiB') + print(f'{len(self.query)=} * 2 = {len(self.query) * 2 * 8 / 1024 ** 2}MiB') dist = dtw.distance(self.query, series, **self.dists_options) if k is not None: if len(h) < k: if not np.isinf(dist): - heapq.heappush(h, -dist) - max_dist = -min(h) + heapq.heappush(h, (-dist, idx)) + max_dist = -min(h)[0] else: if not np.isinf(dist): - heapq.heappushpop(h, -dist) - max_dist = -min(h) + heapq.heappushpop(h, (-dist, idx)) + max_dist = -min(h)[0] self.dists_options['max_dist'] = max_dist - self.distances[idx] = dist + if self.keep_all_distances: + self.distances[idx] = dist + if not self.keep_all_distances: + self.distances = h def best_match_fast(self): self.dists_options['use_c'] = True @@ -601,8 +611,20 @@ def kbest_matches_fast(self, k=1): return self.kbest_matches(k=k) def kbest_matches(self, k=1): + """Return the k best matches. + + It is recommended to set k to a value, and not None. + If k is set to None, all comparisons are kept and returned. Also no early + stopping is applied in case k is None. + + :param k: Number of best matches to return (default is 1) + :return: List of SSMatch objects + """ self.align(k=k) if k is None: return [SSMatch(best_idx, self) for best_idx in range(len(self.distances))] - best_idxs = np.argpartition(self.distances, k) - return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] + if self.keep_all_distances: + best_idxs = np.argpartition(self.distances, k) + return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] + distances = reversed(sorted(self.distances)) + return [SSMatch(best_idx, self) for dist, best_idx in distances] From 26c39622a269cd2bff9ddc42683b21ae3b2533e5 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 10 Oct 2022 13:11:52 +0200 Subject: [PATCH 07/59] subseqsearch: improved --- dtaidistance/subsequence/dtw.py | 7 ++++--- tests/test_subsequence.py | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index b12b73e5..d42c0b7c 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -549,7 +549,10 @@ def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distance self.k = None self.dists_options = {} if dists_options is None else dists_options self.use_lb = use_lb - self.keep_all_distances = keep_all_distances + if self.k is None: + self.keep_all_distances = True + else: + self.keep_all_distances = keep_all_distances if self.use_lb and not self.keep_all_distances: raise ValueError("If use_lb is true, then keep_all_distances should also be true.") @@ -579,8 +582,6 @@ def align(self, k=None): for idx, series in enumerate(self.s): if self.use_lb and self.lbs[idx] > max_dist: continue - print(f'{len(self.query)=} * {len(series)=} = {len(self.query)*len(series)*8/1024**2}MiB') - print(f'{len(self.query)=} * 2 = {len(self.query) * 2 * 8 / 1024 ** 2}MiB') dist = dtw.distance(self.query, series, **self.dists_options) if k is not None: if len(h) < k: diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index 22d8b3b1..f1384c99 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -240,12 +240,13 @@ def test_dtw_subseqsearch_eeg(benchmark): query, s, k, series, s_idx = create_data_subseqsearch_eeg(np) def run(): - sa = subsequence_search(query, s, dists_options={'use_c': True}) + sa = subsequence_search(query, s, dists_options={'use_c': True}, + keep_all_distances=False) best = sa.kbest_matches_fast(k=k) - return best + return best, sa if benchmark is None: tic = time.perf_counter() - best = run() + best, sa = run() toc = time.perf_counter() print("Searching performed in {:0.4f} seconds".format(toc - tic)) else: @@ -253,6 +254,8 @@ def run(): # print(sa.distances) # print(best) + assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4)]" + if directory and not dtwvis.test_without_visualization(): try: import matplotlib.pyplot as plt From 31f77034bd7687cbcb2820be008065bd8d1d6ff8 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 11 Oct 2022 15:45:47 +0200 Subject: [PATCH 08/59] subseqsearch: reduce memory --- dtaidistance/subsequence/dtw.py | 73 ++++++++++++++++++++++++++------- tests/test_subsequence.py | 5 ++- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index d42c0b7c..b15918a2 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -512,20 +512,24 @@ class SSMatch: series in the original list of series. The distance property returns the DTW distance between the query and the series at index idx. """ - def __init__(self, idx, ss): - self.idx = idx + def __init__(self, kidx, ss): + self.kidx = kidx self.ss = ss @property def distance(self): """DTW distance.""" - return self.ss.distances[self.idx] + return self.ss.kbest_distances[self.kidx][0] @property def value(self): """Normalized DTW distance.""" return self.distance / len(self.ss.query) + @property + def idx(self): + return self.ss.kbest_distances[self.kidx][1] + def __str__(self): return f'SSMatch({self.idx})' @@ -533,6 +537,29 @@ def __repr__(self): return self.__str__() +class SSMatches: + def __init__(self, ss): + self.ss = ss + + def __getitem__(self, key): + if isinstance(key, slice): + start = 0 if key.start is None else key.start + return [SSMatch(kip+start, self.ss) for kip, (v, i) in + enumerate(self.ss.kbest_distances[key])] + return SSMatch(key, self.ss) + + def __iter__(self): + for ki, (v, i) in enumerate(self.ss.kbest_distances): + yield SSMatch(ki, self.ss) + + def __str__(self): + if len(self.ss.kbest_distances) > 10: + return '[' + ', '.join(str(m) for m in self[:5]) + ' ... ' +\ + ', '.join(str(m) for m in self[-5:]) + ']' + return '[' + ', '.join(str(m) for m in self) + ']' + + + class SubsequenceSearch: def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distances=False): """Search the best matching (subsequence) time series compared to a given time series. @@ -545,6 +572,7 @@ def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distance self.query = query self.s = s self.distances = None + self.kbest_distances = None self.lbs = None self.k = None self.dists_options = {} if dists_options is None else dists_options @@ -558,12 +586,13 @@ def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distance def reset(self): self.distances = None + self.kbest_distances = None + self.lbs = None def compute_lbs(self): self.lbs = np.zeros((len(self.s),)) for idx, series in enumerate(self.s): self.lbs[idx] = dtw.lb_keogh(self.query, series, **self.dists_options) - print(self.lbs) def align_fast(self, k=None): self.dists_options['use_c'] = True @@ -595,8 +624,21 @@ def align(self, k=None): self.dists_options['max_dist'] = max_dist if self.keep_all_distances: self.distances[idx] = dist - if not self.keep_all_distances: - self.distances = h + self.kbest_distances = sorted((-v, i) for v, i in h) + + self.k = k + + def get_ith_value(self, i): + """Return the i-th value from the k-best values. + + :param i: Return i-th best value (i < k) + :return: (distance, index) + """ + if self.distances is None or self.k is None: + raise ValueError('Align should be called before asking for the i-th value.') + if i > self.k: + raise ValueError('The i-th value is not available, i={}>k={}'.format(i, self.k)) + return self.kbest_distances[i] def best_match_fast(self): self.dists_options['use_c'] = True @@ -604,8 +646,8 @@ def best_match_fast(self): def best_match(self): self.align(k=1) - best_idx = np.argmin(self.distances) - return SSMatch(best_idx, self) + # _value, best_idx = self.kbest_distances[0] + return SSMatch(0, self) def kbest_matches_fast(self, k=1): self.dists_options['use_c'] = True @@ -622,10 +664,11 @@ def kbest_matches(self, k=1): :return: List of SSMatch objects """ self.align(k=k) - if k is None: - return [SSMatch(best_idx, self) for best_idx in range(len(self.distances))] - if self.keep_all_distances: - best_idxs = np.argpartition(self.distances, k) - return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] - distances = reversed(sorted(self.distances)) - return [SSMatch(best_idx, self) for dist, best_idx in distances] + # if k is None: + # return [SSMatch(best_idx, self) for best_idx in range(len(self.distances))] + # if self.keep_all_distances: + # best_idxs = np.argpartition(self.distances, k) + # return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] + # distances = reversed(sorted(self.h)) + # return [SSMatch(best_idx, self) for dist, best_idx in distances] + return SSMatches(self) diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index f1384c99..c0aea8f3 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -254,7 +254,10 @@ def run(): # print(sa.distances) # print(best) - assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4)]" + assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best) + assert str(best[0]) == str(sa.best_match()), '{} != {}'.format(best[0], sa.best_match()) + assert str(best[:]) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best[:]) + assert str(best[0:3]) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best[0:3]) if directory and not dtwvis.test_without_visualization(): try: From f36d69a4e98b3bf6b5f0685c40e605d1c46defd5 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 11 Oct 2022 23:06:03 +0200 Subject: [PATCH 09/59] jinja: use ctypedef --- dtaidistance/jinja/dtw_cc.jinja.pxd | 7 +-- dtaidistance/jinja/dtw_cc.jinja.pyx | 53 ++++++++++--------- dtaidistance/jinja/dtw_cc_dba.jinja.pyx | 8 +-- .../jinja/dtw_cc_warpingpath.jinja.pyx | 4 +- .../jinja/dtw_cc_warpingpaths.jinja.pyx | 20 +++---- dtaidistance/jinja/ed_cc.jinja.pyx | 5 +- 6 files changed, 50 insertions(+), 47 deletions(-) diff --git a/dtaidistance/jinja/dtw_cc.jinja.pxd b/dtaidistance/jinja/dtw_cc.jinja.pxd index 4b7d79d5..332d3892 100644 --- a/dtaidistance/jinja/dtw_cc.jinja.pxd +++ b/dtaidistance/jinja/dtw_cc.jinja.pxd @@ -1,4 +1,5 @@ cimport dtaidistancec_dtw +from dtaidistancec_dtw cimport seq_t cdef class DTWBlock: cdef dtaidistancec_dtw.DTWBlock _block @@ -10,13 +11,13 @@ cdef class DTWWps: cdef dtaidistancec_dtw.DTWWps _wps cdef class DTWSeriesPointers: - cdef {{seq_tpy}} **_ptrs + cdef seq_t **_ptrs cdef Py_ssize_t *_lengths cdef Py_ssize_t _nb_ptrs cdef class DTWSeriesMatrix: - cdef {{seq_tpy}}[:,::1] _data + cdef seq_t[:,::1] _data cdef class DTWSeriesMatrixNDim: - cdef {{seq_tpy}}[:,:,::1] _data + cdef seq_t[:,:,::1] _data diff --git a/dtaidistance/jinja/dtw_cc.jinja.pyx b/dtaidistance/jinja/dtw_cc.jinja.pyx index c0dd8f1a..54b9b0b1 100644 --- a/dtaidistance/jinja/dtw_cc.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc.jinja.pyx @@ -20,6 +20,7 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free cimport dtaidistancec_dtw cimport dtaidistancec_globals +from dtaidistancec_dtw cimport seq_t cdef extern from "Python.h": @@ -196,7 +197,7 @@ cdef class DTWSettings: cdef class DTWSeriesPointers: def __cinit__(self, int nb_series): - self._ptrs = <{{seq_tpy}} **> malloc(nb_series * sizeof({{seq_tpy}}*)) + self._ptrs = malloc(nb_series * sizeof(seq_t*)) self._nb_ptrs = nb_series if not self._ptrs: self._ptrs = NULL @@ -214,7 +215,7 @@ cdef class DTWSeriesPointers: cdef class DTWSeriesMatrix: - def __cinit__(self, {{seq_tpy}}[:, ::1] data): + def __cinit__(self, seq_t[:, ::1] data): self._data = data @property @@ -227,7 +228,7 @@ cdef class DTWSeriesMatrix: cdef class DTWSeriesMatrixNDim: - def __cinit__(self, {{seq_tpy}}[:, :, ::1] data): + def __cinit__(self, seq_t[:, :, ::1] data): self._data = data @property @@ -251,7 +252,7 @@ def dtw_series_from_data(data, force_pointers=False): ptrs = DTWSeriesPointers(len(data)) for i in range(len(data)): ptr = data[i].ctypes.data # uniform for memoryviews and numpy - ptrs._ptrs[i] = <{{seq_tpy}} *> ptr + ptrs._ptrs[i] = ptr ptrs._lengths[i] = len(data[i]) return ptrs try: @@ -266,12 +267,12 @@ def dtw_series_from_data(data, force_pointers=False): raise ValueError(f"Cannot convert data of type {type(data)}") -def ub_euclidean({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2): +def ub_euclidean(seq_t[:] s1, seq_t[:] s2): """ See ed.euclidean_distance""" return dtaidistancec_dtw.ub_euclidean(&s1[0], len(s1), &s2[0], len(s2)) -def ub_euclidean_ndim({{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2): +def ub_euclidean_ndim(seq_t[:, :] s1, seq_t[:, :] s2): """ See ed.euclidean_distance_ndim""" # Assumes C contiguous if s1.shape[1] != s2.shape[1]: @@ -280,20 +281,20 @@ def ub_euclidean_ndim({{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2): return dtaidistancec_dtw.ub_euclidean_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim) -def lb_keogh({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2, **kwargs): +def lb_keogh(seq_t[:] s1, seq_t[:] s2, **kwargs): # Assumes C contiguous settings = DTWSettings(**kwargs) return dtaidistancec_dtw.lb_keogh(&s1[0], len(s1), &s2[0], len(s2), &settings._settings) -def distance({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2, **kwargs): +def distance(seq_t[:] s1, seq_t[:] s2, **kwargs): """DTW distance. Assumes C-contiguous arrays. See distance(). - :param s1: First sequence (buffer of {{seq_tpy}}s) - :param s2: Second sequence (buffer of {{seq_tpy}}s) + :param s1: First sequence (buffer of seq_t-s) + :param s2: Second sequence (buffer of seq_t-s) :param kwargs: Settings (see DTWSettings) """ # Assumes C contiguous @@ -301,14 +302,14 @@ def distance({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2, **kwargs): return dtaidistancec_dtw.dtw_distance(&s1[0], len(s1), &s2[0], len(s2), &settings._settings) -def distance_ndim({{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2, **kwargs): +def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2, **kwargs): """DTW distance for n-dimensional arrays. Assumes C-contiguous arrays. See distance(). - :param s1: First sequence (buffer of {{seq_tpy}}s) - :param s2: Second sequence (buffer of {{seq_tpy}}s) + :param s1: First sequence (buffer of seq_t-s) + :param s2: Second sequence (buffer of seq_t-s) :param ndim: Number of dimensions :param kwargs: Settings (see DTWSettings) """ @@ -320,14 +321,14 @@ def distance_ndim({{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2, **kwargs): return dtaidistancec_dtw.dtw_distance_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim, &settings._settings) -def distance_ndim_assinglearray({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2, int ndim, **kwargs): +def distance_ndim_assinglearray(seq_t[:] s1, seq_t[:] s2, int ndim, **kwargs): """DTW distance for n-dimensional arrays. Assumes C-contiguous arrays (with sequence item as first dimension). See distance(). - :param s1: First sequence (buffer of {{seq_tpy}}s) - :param s2: Second sequence (buffer of {{seq_tpy}}s) + :param s1: First sequence (buffer of seq_ts) + :param s2: Second sequence (buffer of seq_ts) :param ndim: Number of dimensions :param kwargs: Settings (see DTWSettings) """ @@ -361,34 +362,34 @@ def wps_width(Py_ssize_t l1, Py_ssize_t l2, **kwargs): {%- include 'dtw_cc_warpingpath.jinja.pyx' %} -def wps_negativize(DTWWps p, {{seq_tpy}}[:, :] wps, Py_ssize_t rb, Py_ssize_t re): +def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], rb, re) -def wps_positivize(DTWWps p, {{seq_tpy}}[:, :] wps, Py_ssize_t rb, Py_ssize_t re): +def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], rb, re) -def wps_max(DTWWps p, {{seq_tpy}}[:, :] wps): +def wps_max(DTWWps p, seq_t[:, :] wps): cdef Py_ssize_t r, c result = dtaidistancec_dtw.dtw_wps_max(&p._wps, &wps[0,0], &r, &c, wps.shape[0] - 1, wps.shape[1] - 1) return r, c -def wps_expand_slice({{seq_tpy}}[:, :] wps, {{seq_tpy}}[:, :] slice, Py_ssize_t l1, Py_ssize_t l2, +def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce, DTWSettings settings): dtaidistancec_dtw.dtw_expand_wps_slice_affinity(&wps[0, 0], &slice[0, 0], l1, l2, rb, re, cb, ce, &settings._settings) -def wps_print({{seq_tpy}}[:, :] wps, **kwargs): +def wps_print(seq_t[:, :] wps, **kwargs): settings = DTWSettings(**kwargs) dtaidistancec_dtw.dtw_print_wps(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) -def wps_print_compact({{seq_tpy}}[:, :] wps, **kwargs): +def wps_print_compact(seq_t[:, :] wps, **kwargs): settings = DTWSettings(**kwargs) dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) -def best_path_compact_affinity({{seq_tpy}}[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): +def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) l1 = wps.shape[0] - 1 @@ -415,7 +416,7 @@ def best_path_compact_affinity({{seq_tpy}}[:, :] wps, Py_ssize_t rs, Py_ssize_t def srand(unsigned int seed): dtaidistancec_dtw.dtw_srand(seed) -def warping_path_prob({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2, {{seq_tpy}} avg, **kwargs): +def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -456,8 +457,8 @@ def distance_matrix_length(DTWBlock block, Py_ssize_t nb_series): {%- include 'dtw_cc_dba.jinja.pyx' %} -cdef dba_inner(cur, {{seq_tpy}} *c_ptr, Py_ssize_t c_len, unsigned char *mask_ptr, int nb_prob_samples, int ndim, DTWSettings settings): - cdef {{seq_tpy}} *matrix_ptr; +cdef dba_inner(cur, seq_t *c_ptr, Py_ssize_t c_len, unsigned char *mask_ptr, int nb_prob_samples, int ndim, DTWSettings settings): + cdef seq_t *matrix_ptr; cdef DTWSeriesMatrix matrix cdef DTWSeriesMatrixNDim matrix_ndim cdef DTWSeriesPointers ptrs diff --git a/dtaidistance/jinja/dtw_cc_dba.jinja.pyx b/dtaidistance/jinja/dtw_cc_dba.jinja.pyx index 45da25d6..8ac58b22 100644 --- a/dtaidistance/jinja/dtw_cc_dba.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc_dba.jinja.pyx @@ -6,15 +6,15 @@ {%- endif %} def dba{{suffix}}(cur,{{s}} {%- if "ndim" in suffix -%} - {{seq_tpy}}[:, :] c, unsigned char[:] mask, int nb_prob_samples, int ndim,{{s}} + seq_t[:, :] c, unsigned char[:] mask, int nb_prob_samples, int ndim,{{s}} {%- else -%} - {{seq_tpy}}[:] c, unsigned char[:] mask, int nb_prob_samples,{{s}} + seq_t[:] c, unsigned char[:] mask, int nb_prob_samples,{{s}} {%- endif -%} **kwargs): {%- if "ndim" in suffix %} - cdef {{seq_tpy}} *c_ptr = &c[0, 0]; + cdef seq_t *c_ptr = &c[0, 0]; {%- else %} - cdef {{seq_tpy}} *c_ptr = &c[0]; + cdef seq_t *c_ptr = &c[0]; {%- endif %} cdef unsigned char *mask_ptr = &mask[0]; settings = DTWSettings(**kwargs) diff --git a/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx b/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx index 97e01603..d5401f31 100644 --- a/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx @@ -1,9 +1,9 @@ {%- set s = " " %} def warping_path{{suffix}}( {%- if "ndim" in suffix -%} - {{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2, int ndim=1,{{s}} + seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1,{{s}} {%- else -%} - {{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2,{{s}} + seq_t[:] s1, seq_t[:] s2,{{s}} {%- endif -%} **kwargs): # Assumes C contiguous diff --git a/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx b/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx index e4a09861..6506a627 100644 --- a/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc_warpingpaths.jinja.pyx @@ -26,12 +26,12 @@ def warping_paths{{ suffix }}( {%- if "ndim" in suffix -%} - {{seq_tpy}}[:, :] dtw, {{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2,{{s}} + seq_t[:, :] dtw, seq_t[:, :] s1, seq_t[:, :] s2,{{s}} {%- else -%} - {{seq_tpy}}[:, :] dtw, {{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2,{{s}} + seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2,{{s}} {%- endif -%} {%- if "affinity" in suffix %} - bint only_triu, {{seq_tpy}} gamma, {{seq_tpy}} tau, {{seq_tpy}} delta, {{seq_tpy}} delta_factor, + bint only_triu, seq_t gamma, seq_t tau, seq_t delta, seq_t delta_factor, {%- endif %} bint psi_neg=False, **kwargs): {%- if "ndim" in suffix %} @@ -51,12 +51,12 @@ def warping_paths{{ suffix }}( else: try: # Use cython.view.array to avoid numpy dependency - wps = cvarray(shape=shape, itemsize=sizeof({{seq_tpy}}), format="{{seq_format}}") + wps = cvarray(shape=shape, itemsize=sizeof(seq_t), format="{{seq_format}}") except MemoryError as exc: print("Cannot allocate memory for warping paths matrix. Trying " + str(shape) + ".") raise exc - cdef {{seq_tpy}} [:, :] wps_view = wps - cdef {{seq_tpy}} d + cdef seq_t [:, :] wps_view = wps + cdef seq_t d {{ select_c_fn("wps_view")}} if not (req_length == dtw_length and req_width == dtw.shape[1]): {%- if "affinity" in suffix %} @@ -69,12 +69,12 @@ def warping_paths{{ suffix }}( def warping_paths_compact{{ suffix }}( {%- if "ndim" in suffix -%} - {{seq_tpy}}[:, :] dtw, {{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2,{{s}} + seq_t[:, :] dtw, seq_t[:, :] s1, seq_t[:, :] s2,{{s}} {%- else -%} - {{seq_tpy}}[:, :] dtw, {{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2,{{s}} + seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2,{{s}} {%- endif -%} {%- if "affinity" in suffix %} - bint only_triu, {{seq_tpy}} gamma, {{seq_tpy}} tau, {{seq_tpy}} delta, {{seq_tpy}} delta_factor, + bint only_triu, seq_t gamma, seq_t tau, seq_t delta, seq_t delta_factor, {%- endif %} bint psi_neg=False, **kwargs): {%- if "ndim" in suffix %} @@ -84,6 +84,6 @@ def warping_paths_compact{{ suffix }}( {%- endif %} # Assumes C contiguous settings = DTWSettings(**kwargs) - cdef {{seq_tpy}} d + cdef seq_t d {{ select_c_fn("dtw") }} return d diff --git a/dtaidistance/jinja/ed_cc.jinja.pyx b/dtaidistance/jinja/ed_cc.jinja.pyx index 3f5e3696..75ca257c 100644 --- a/dtaidistance/jinja/ed_cc.jinja.pyx +++ b/dtaidistance/jinja/ed_cc.jinja.pyx @@ -11,12 +11,13 @@ Euclidean Distance (ED), C implementation. """ import logging cimport dtaidistancec_ed +from dtaidistancec_dtw cimport seq_t logger = logging.getLogger("be.kuleuven.dtai.distance") -def distance({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2): +def distance(seq_t[:] s1, seq_t[:] s2): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -29,7 +30,7 @@ def distance({{seq_tpy}}[:] s1, {{seq_tpy}}[:] s2): return dtaidistancec_ed.euclidean_distance(&s1[0], len(s1), &s2[0], len(s2)) -def distance_ndim({{seq_tpy}}[:, :] s1, {{seq_tpy}}[:, :] s2): +def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series From 22596a84b9d4a1a71b3e59a1c1d13937daeb08cc Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 11 Oct 2022 23:13:01 +0200 Subject: [PATCH 10/59] double to seq_t --- dtaidistance/dtw_cc.pxd | 7 +-- dtaidistance/dtw_cc.pyx | 105 ++++++++++++++++++++-------------------- dtaidistance/ed_cc.pyx | 5 +- 3 files changed, 60 insertions(+), 57 deletions(-) diff --git a/dtaidistance/dtw_cc.pxd b/dtaidistance/dtw_cc.pxd index 831c4515..3f75558d 100644 --- a/dtaidistance/dtw_cc.pxd +++ b/dtaidistance/dtw_cc.pxd @@ -1,4 +1,5 @@ cimport dtaidistancec_dtw +from dtaidistancec_dtw cimport seq_t cdef class DTWBlock: cdef dtaidistancec_dtw.DTWBlock _block @@ -10,12 +11,12 @@ cdef class DTWWps: cdef dtaidistancec_dtw.DTWWps _wps cdef class DTWSeriesPointers: - cdef double **_ptrs + cdef seq_t **_ptrs cdef Py_ssize_t *_lengths cdef Py_ssize_t _nb_ptrs cdef class DTWSeriesMatrix: - cdef double[:,::1] _data + cdef seq_t[:,::1] _data cdef class DTWSeriesMatrixNDim: - cdef double[:,:,::1] _data + cdef seq_t[:,:,::1] _data diff --git a/dtaidistance/dtw_cc.pyx b/dtaidistance/dtw_cc.pyx index 60f44d59..e849d4e3 100644 --- a/dtaidistance/dtw_cc.pyx +++ b/dtaidistance/dtw_cc.pyx @@ -20,6 +20,7 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free cimport dtaidistancec_dtw cimport dtaidistancec_globals +from dtaidistancec_dtw cimport seq_t cdef extern from "Python.h": @@ -196,7 +197,7 @@ cdef class DTWSettings: cdef class DTWSeriesPointers: def __cinit__(self, int nb_series): - self._ptrs = malloc(nb_series * sizeof(double*)) + self._ptrs = malloc(nb_series * sizeof(seq_t*)) self._nb_ptrs = nb_series if not self._ptrs: self._ptrs = NULL @@ -214,7 +215,7 @@ cdef class DTWSeriesPointers: cdef class DTWSeriesMatrix: - def __cinit__(self, double[:, ::1] data): + def __cinit__(self, seq_t[:, ::1] data): self._data = data @property @@ -227,7 +228,7 @@ cdef class DTWSeriesMatrix: cdef class DTWSeriesMatrixNDim: - def __cinit__(self, double[:, :, ::1] data): + def __cinit__(self, seq_t[:, :, ::1] data): self._data = data @property @@ -251,7 +252,7 @@ def dtw_series_from_data(data, force_pointers=False): ptrs = DTWSeriesPointers(len(data)) for i in range(len(data)): ptr = data[i].ctypes.data # uniform for memoryviews and numpy - ptrs._ptrs[i] = ptr + ptrs._ptrs[i] = ptr ptrs._lengths[i] = len(data[i]) return ptrs try: @@ -266,12 +267,12 @@ def dtw_series_from_data(data, force_pointers=False): raise ValueError(f"Cannot convert data of type {type(data)}") -def ub_euclidean(double[:] s1, double[:] s2): +def ub_euclidean(seq_t[:] s1, seq_t[:] s2): """ See ed.euclidean_distance""" return dtaidistancec_dtw.ub_euclidean(&s1[0], len(s1), &s2[0], len(s2)) -def ub_euclidean_ndim(double[:, :] s1, double[:, :] s2): +def ub_euclidean_ndim(seq_t[:, :] s1, seq_t[:, :] s2): """ See ed.euclidean_distance_ndim""" # Assumes C contiguous if s1.shape[1] != s2.shape[1]: @@ -280,20 +281,20 @@ def ub_euclidean_ndim(double[:, :] s1, double[:, :] s2): return dtaidistancec_dtw.ub_euclidean_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim) -def lb_keogh(double[:] s1, double[:] s2, **kwargs): +def lb_keogh(seq_t[:] s1, seq_t[:] s2, **kwargs): # Assumes C contiguous settings = DTWSettings(**kwargs) return dtaidistancec_dtw.lb_keogh(&s1[0], len(s1), &s2[0], len(s2), &settings._settings) -def distance(double[:] s1, double[:] s2, **kwargs): +def distance(seq_t[:] s1, seq_t[:] s2, **kwargs): """DTW distance. Assumes C-contiguous arrays. See distance(). - :param s1: First sequence (buffer of doubles) - :param s2: Second sequence (buffer of doubles) + :param s1: First sequence (buffer of seq_t-s) + :param s2: Second sequence (buffer of seq_t-s) :param kwargs: Settings (see DTWSettings) """ # Assumes C contiguous @@ -301,14 +302,14 @@ def distance(double[:] s1, double[:] s2, **kwargs): return dtaidistancec_dtw.dtw_distance(&s1[0], len(s1), &s2[0], len(s2), &settings._settings) -def distance_ndim(double[:, :] s1, double[:, :] s2, **kwargs): +def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2, **kwargs): """DTW distance for n-dimensional arrays. Assumes C-contiguous arrays. See distance(). - :param s1: First sequence (buffer of doubles) - :param s2: Second sequence (buffer of doubles) + :param s1: First sequence (buffer of seq_t-s) + :param s2: Second sequence (buffer of seq_t-s) :param ndim: Number of dimensions :param kwargs: Settings (see DTWSettings) """ @@ -320,14 +321,14 @@ def distance_ndim(double[:, :] s1, double[:, :] s2, **kwargs): return dtaidistancec_dtw.dtw_distance_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim, &settings._settings) -def distance_ndim_assinglearray(double[:] s1, double[:] s2, int ndim, **kwargs): +def distance_ndim_assinglearray(seq_t[:] s1, seq_t[:] s2, int ndim, **kwargs): """DTW distance for n-dimensional arrays. Assumes C-contiguous arrays (with sequence item as first dimension). See distance(). - :param s1: First sequence (buffer of doubles) - :param s2: Second sequence (buffer of doubles) + :param s1: First sequence (buffer of seq_ts) + :param s2: Second sequence (buffer of seq_ts) :param ndim: Number of dimensions :param kwargs: Settings (see DTWSettings) """ @@ -345,7 +346,7 @@ def wps_width(Py_ssize_t l1, Py_ssize_t l2, **kwargs): settings = DTWSettings(**kwargs) return dtaidistancec_dtw.dtw_settings_wps_width(l1, l2, &settings._settings) -def warping_paths(double[:, :] dtw, double[:] s1, double[:] s2, +def warping_paths(seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2, bint psi_neg=False, **kwargs): # Assumes C contiguous settings = DTWSettings(**kwargs) @@ -359,12 +360,12 @@ def warping_paths(double[:, :] dtw, double[:] s1, double[:] s2, else: try: # Use cython.view.array to avoid numpy dependency - wps = cvarray(shape=shape, itemsize=sizeof(double), format="d") + wps = cvarray(shape=shape, itemsize=sizeof(seq_t), format="d") except MemoryError as exc: print("Cannot allocate memory for warping paths matrix. Trying " + str(shape) + ".") raise exc - cdef double [:, :] wps_view = wps - cdef double d + cdef seq_t [:, :] wps_view = wps + cdef seq_t d d = dtaidistancec_dtw.dtw_warping_paths(&wps_view[0,0], &s1[0], len(s1), &s2[0], len(s2), True, True, psi_neg, &settings._settings) @@ -373,17 +374,17 @@ def warping_paths(double[:, :] dtw, double[:] s1, double[:] s2, return d -def warping_paths_compact(double[:, :] dtw, double[:] s1, double[:] s2, +def warping_paths_compact(seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2, bint psi_neg=False, **kwargs): # Assumes C contiguous settings = DTWSettings(**kwargs) - cdef double d + cdef seq_t d d = dtaidistancec_dtw.dtw_warping_paths(&dtw[0,0], &s1[0], len(s1), &s2[0], len(s2), True, True, psi_neg, &settings._settings) return d -def warping_paths_ndim(double[:, :] dtw, double[:, :] s1, double[:, :] s2, +def warping_paths_ndim(seq_t[:, :] dtw, seq_t[:, :] s1, seq_t[:, :] s2, bint psi_neg=False, **kwargs): ndim = s1.shape[1] if s1.shape[1] != s2.shape[1]: @@ -400,12 +401,12 @@ def warping_paths_ndim(double[:, :] dtw, double[:, :] s1, double[:, :] s2, else: try: # Use cython.view.array to avoid numpy dependency - wps = cvarray(shape=shape, itemsize=sizeof(double), format="d") + wps = cvarray(shape=shape, itemsize=sizeof(seq_t), format="d") except MemoryError as exc: print("Cannot allocate memory for warping paths matrix. Trying " + str(shape) + ".") raise exc - cdef double [:, :] wps_view = wps - cdef double d + cdef seq_t [:, :] wps_view = wps + cdef seq_t d d = dtaidistancec_dtw.dtw_warping_paths_ndim(&wps_view[0,0], &s1[0,0], len(s1), &s2[0,0], len(s2), True, True, psi_neg, ndim, &settings._settings) @@ -414,21 +415,21 @@ def warping_paths_ndim(double[:, :] dtw, double[:, :] s1, double[:, :] s2, return d -def warping_paths_compact_ndim(double[:, :] dtw, double[:, :] s1, double[:, :] s2, +def warping_paths_compact_ndim(seq_t[:, :] dtw, seq_t[:, :] s1, seq_t[:, :] s2, bint psi_neg=False, **kwargs): if s1.shape[1] != s2.shape[1]: raise Exception("Dimension of sequence entries needs to be the same: {} != {}".format(s1.shape[1], s2.shape[1])) ndim = s1.shape[1] # Assumes C contiguous settings = DTWSettings(**kwargs) - cdef double d + cdef seq_t d d = dtaidistancec_dtw.dtw_warping_paths_ndim(&dtw[0,0], &s1[0,0], len(s1), &s2[0,0], len(s2), True, True, psi_neg, ndim, &settings._settings) return d -def warping_paths_affinity(double[:, :] dtw, double[:] s1, double[:] s2, - bint only_triu, double gamma, double tau, double delta, double delta_factor, +def warping_paths_affinity(seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2, + bint only_triu, seq_t gamma, seq_t tau, seq_t delta, seq_t delta_factor, bint psi_neg=False, **kwargs): # Assumes C contiguous settings = DTWSettings(**kwargs) @@ -442,12 +443,12 @@ def warping_paths_affinity(double[:, :] dtw, double[:] s1, double[:] s2, else: try: # Use cython.view.array to avoid numpy dependency - wps = cvarray(shape=shape, itemsize=sizeof(double), format="d") + wps = cvarray(shape=shape, itemsize=sizeof(seq_t), format="d") except MemoryError as exc: print("Cannot allocate memory for warping paths matrix. Trying " + str(shape) + ".") raise exc - cdef double [:, :] wps_view = wps - cdef double d + cdef seq_t [:, :] wps_view = wps + cdef seq_t d d = dtaidistancec_dtw.dtw_warping_paths_affinity(&wps_view[0,0], &s1[0], len(s1), &s2[0], len(s2), True, False, psi_neg, only_triu, @@ -458,12 +459,12 @@ def warping_paths_affinity(double[:, :] dtw, double[:] s1, double[:] s2, return d -def warping_paths_compact_affinity(double[:, :] dtw, double[:] s1, double[:] s2, - bint only_triu, double gamma, double tau, double delta, double delta_factor, +def warping_paths_compact_affinity(seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2, + bint only_triu, seq_t gamma, seq_t tau, seq_t delta, seq_t delta_factor, bint psi_neg=False, **kwargs): # Assumes C contiguous settings = DTWSettings(**kwargs) - cdef double d + cdef seq_t d d = dtaidistancec_dtw.dtw_warping_paths_affinity(&dtw[0,0], &s1[0], len(s1), &s2[0], len(s2), True, False, psi_neg, only_triu, @@ -472,7 +473,7 @@ def warping_paths_compact_affinity(double[:, :] dtw, double[:] s1, double[:] s2, return d -def warping_path(double[:] s1, double[:] s2, **kwargs): +def warping_path(seq_t[:] s1, seq_t[:] s2, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -494,7 +495,7 @@ def warping_path(double[:] s1, double[:] s2, **kwargs): return path -def warping_path_ndim(double[:, :] s1, double[:, :] s2, int ndim=1, **kwargs): +def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -516,34 +517,34 @@ def warping_path_ndim(double[:, :] s1, double[:, :] s2, int ndim=1, **kwargs): return path -def wps_negativize(DTWWps p, double[:, :] wps, Py_ssize_t rb, Py_ssize_t re): +def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], rb, re) -def wps_positivize(DTWWps p, double[:, :] wps, Py_ssize_t rb, Py_ssize_t re): +def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], rb, re) -def wps_max(DTWWps p, double[:, :] wps): +def wps_max(DTWWps p, seq_t[:, :] wps): cdef Py_ssize_t r, c result = dtaidistancec_dtw.dtw_wps_max(&p._wps, &wps[0,0], &r, &c, wps.shape[0] - 1, wps.shape[1] - 1) return r, c -def wps_expand_slice(double[:, :] wps, double[:, :] slice, Py_ssize_t l1, Py_ssize_t l2, +def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce, DTWSettings settings): dtaidistancec_dtw.dtw_expand_wps_slice_affinity(&wps[0, 0], &slice[0, 0], l1, l2, rb, re, cb, ce, &settings._settings) -def wps_print(double[:, :] wps, **kwargs): +def wps_print(seq_t[:, :] wps, **kwargs): settings = DTWSettings(**kwargs) dtaidistancec_dtw.dtw_print_wps(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) -def wps_print_compact(double[:, :] wps, **kwargs): +def wps_print_compact(seq_t[:, :] wps, **kwargs): settings = DTWSettings(**kwargs) dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) -def best_path_compact_affinity(double[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): +def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) l1 = wps.shape[0] - 1 @@ -570,7 +571,7 @@ def best_path_compact_affinity(double[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, * def srand(unsigned int seed): dtaidistancec_dtw.dtw_srand(seed) -def warping_path_prob(double[:] s1, double[:] s2, double avg, **kwargs): +def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -734,24 +735,24 @@ def distance_matrix_length(DTWBlock block, Py_ssize_t nb_series): return length -def dba(cur, double[:] c, unsigned char[:] mask, int nb_prob_samples, **kwargs): - cdef double *c_ptr = &c[0]; +def dba(cur, seq_t[:] c, unsigned char[:] mask, int nb_prob_samples, **kwargs): + cdef seq_t *c_ptr = &c[0]; cdef unsigned char *mask_ptr = &mask[0]; settings = DTWSettings(**kwargs) dba_inner(cur, c_ptr, len(c), mask_ptr, nb_prob_samples, 1, settings) return c -def dba_ndim(cur, double[:, :] c, unsigned char[:] mask, int nb_prob_samples, int ndim, **kwargs): - cdef double *c_ptr = &c[0, 0]; +def dba_ndim(cur, seq_t[:, :] c, unsigned char[:] mask, int nb_prob_samples, int ndim, **kwargs): + cdef seq_t *c_ptr = &c[0, 0]; cdef unsigned char *mask_ptr = &mask[0]; settings = DTWSettings(**kwargs) dba_inner(cur, c_ptr, len(c), mask_ptr, nb_prob_samples, ndim, settings) return c -cdef dba_inner(cur, double *c_ptr, Py_ssize_t c_len, unsigned char *mask_ptr, int nb_prob_samples, int ndim, DTWSettings settings): - cdef double *matrix_ptr; +cdef dba_inner(cur, seq_t *c_ptr, Py_ssize_t c_len, unsigned char *mask_ptr, int nb_prob_samples, int ndim, DTWSettings settings): + cdef seq_t *matrix_ptr; cdef DTWSeriesMatrix matrix cdef DTWSeriesMatrixNDim matrix_ndim cdef DTWSeriesPointers ptrs diff --git a/dtaidistance/ed_cc.pyx b/dtaidistance/ed_cc.pyx index 05ea329e..e9f70e3c 100644 --- a/dtaidistance/ed_cc.pyx +++ b/dtaidistance/ed_cc.pyx @@ -11,12 +11,13 @@ Euclidean Distance (ED), C implementation. """ import logging cimport dtaidistancec_ed +from dtaidistancec_dtw cimport seq_t logger = logging.getLogger("be.kuleuven.dtai.distance") -def distance(double[:] s1, double[:] s2): +def distance(seq_t[:] s1, seq_t[:] s2): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -29,7 +30,7 @@ def distance(double[:] s1, double[:] s2): return dtaidistancec_ed.euclidean_distance(&s1[0], len(s1), &s2[0], len(s2)) -def distance_ndim(double[:, :] s1, double[:, :] s2): +def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series From f530e47b87cf352e17be6ce7a3b80d489c548d0c Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 12 Oct 2022 01:10:00 +0200 Subject: [PATCH 11/59] affinity: negativize --- dtaidistance/dtaidistancec_dtw.pxd | 4 +- dtaidistance/dtw_cc.pyx | 8 +- dtaidistance/jinja/dtw_cc.jinja.pyx | 8 +- .../DTAIDistanceC/dd_benchmark.c | 47 +++++- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c | 157 +++++++++++++++++- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h | 5 +- .../DTAIDistanceC/jinja/dd_dtw.jinja.c | 157 +++++++++++++++++- dtaidistance/lib/DTAIDistanceC/Makefile | 6 +- 8 files changed, 357 insertions(+), 35 deletions(-) diff --git a/dtaidistance/dtaidistancec_dtw.pxd b/dtaidistance/dtaidistancec_dtw.pxd index f20120ec..dea3bf88 100644 --- a/dtaidistance/dtaidistancec_dtw.pxd +++ b/dtaidistance/dtaidistancec_dtw.pxd @@ -68,8 +68,8 @@ cdef extern from "dd_dtw.h": Py_ssize_t ce, DTWSettings *settings) void dtw_wps_negativize_value(DTWWps *p, seq_t *wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t r, Py_ssize_t c) void dtw_wps_positivize_value(DTWWps *p, seq_t *wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t r, Py_ssize_t c) - void dtw_wps_positivize(DTWWps *p, seq_t *wps, Py_ssize_t rb, Py_ssize_t re) - void dtw_wps_negativize(DTWWps *p, seq_t *wps, Py_ssize_t rb, Py_ssize_t re) + void dtw_wps_positivize(DTWWps *p, seq_t *wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce) + void dtw_wps_negativize(DTWWps *p, seq_t *wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce) Py_ssize_t dtw_wps_loc(DTWWps *p, Py_ssize_t r, Py_ssize_t c, Py_ssize_t l1, Py_ssize_t l2) Py_ssize_t dtw_wps_max(DTWWps * p, seq_t *wps, Py_ssize_t *r, Py_ssize_t *c, Py_ssize_t l1, Py_ssize_t l2) Py_ssize_t dtw_best_path(seq_t *wps, Py_ssize_t *i1, Py_ssize_t *i2, Py_ssize_t l1, Py_ssize_t l2, diff --git a/dtaidistance/dtw_cc.pyx b/dtaidistance/dtw_cc.pyx index e849d4e3..990810db 100644 --- a/dtaidistance/dtw_cc.pyx +++ b/dtaidistance/dtw_cc.pyx @@ -517,11 +517,11 @@ def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1, **kwargs): return path -def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): - dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], rb, re) +def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): + dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) -def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): - dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], rb, re) +def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): + dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) def wps_max(DTWWps p, seq_t[:, :] wps): cdef Py_ssize_t r, c diff --git a/dtaidistance/jinja/dtw_cc.jinja.pyx b/dtaidistance/jinja/dtw_cc.jinja.pyx index 54b9b0b1..8b2ee7b2 100644 --- a/dtaidistance/jinja/dtw_cc.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc.jinja.pyx @@ -362,11 +362,11 @@ def wps_width(Py_ssize_t l1, Py_ssize_t l2, **kwargs): {%- include 'dtw_cc_warpingpath.jinja.pyx' %} -def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): - dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], rb, re) +def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): + dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) -def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t rb, Py_ssize_t re): - dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], rb, re) +def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): + dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) def wps_max(DTWWps p, seq_t[:, :] wps): cdef Py_ssize_t r, c diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c index 0709e478..f622d0b3 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c @@ -395,20 +395,49 @@ void benchmark_affinity() { } printf("]\n"); + dtw_print_wps(wps, l1, l2, &settings); DTWWps p = dtw_wps_parts(l1, l2, &settings); +// idx_t rb = 2; +// idx_t re = 8; +// idx_t cb = 4; +// idx_t ce = 8; +// seq_t * wps_slice = (seq_t *)malloc(sizeof(seq_t) * (re-rb)*(ce-cb)); +// for (idx_t i=0; i<(re-rb)*(ce-cb); i++) { +// wps_slice[i] = -INFINITY; +// } +// +// dtw_expand_wps_slice_affinity(wps, wps_slice, l1, l2, rb, re, cb, ce, &settings); +// +// idx_t wpsi = 0; +// for (idx_t r=0; r<(re-rb); r++) { +// printf("[ "); +// for (idx_t c=0; c<(ce-cb); c++) { +// printf("%.2f ", wps_slice[wpsi]); +// wpsi++; +// } +// printf("]\n"); +// } + // dtw_wps_negativize(&p, wps, 2, 5); // dtw_wps_positivize(&p, wps, 3, 4); - dtw_print_wps(wps, l1, l2, &settings); - idx_t r, c, wps_i; - r = l1-3; c = l2-2; - wps_i = dtw_wps_loc(&p, r, c, l1, l2); - printf("wps_full[%zu,%zu] = wps[%zu] = %.3f\n", r, c, wps_i, wps[wps_i]); - idx_t maxr, maxc; - idx_t maxidx = dtw_wps_max(&p, wps, &maxr, &maxc, l1, l2); - printf("Max = %.3f @ [%zu]=[%zu,%zu]\n", wps[maxidx], maxidx, maxr, maxc); +// idx_t r, c, wps_i; +// r = l1-3; c = l2-2; +// wps_i = dtw_wps_loc(&p, r, c, l1, l2); +// printf("wps_full[%zu,%zu] = wps[%zu] = %.3f\n", r, c, wps_i, wps[wps_i]); + +// idx_t maxr, maxc; +// idx_t maxidx = dtw_wps_max(&p, wps, &maxr, &maxc, l1, l2); +// printf("Max = %.3f @ [%zu]=[%zu,%zu]\n", wps[maxidx], maxidx, maxr, maxc); + + printf("Negativize\n"); + dtw_wps_negativize(&p, wps, l1, l2, 4, 6, 4, 5); + dtw_print_wps(wps, l1, l2, &settings); + dtw_print_wps_compact(wps, l1, l2, &settings); +// maxidx = dtw_wps_max(&p, wps, &maxr, &maxc, l1, l2); +// printf("Max = %.3f @ [%zu]=[%zu,%zu]\n", wps[maxidx], maxidx, maxr, maxc); free(wps); printf("d = %.2f\n", d); @@ -431,7 +460,7 @@ int main(int argc, const char * argv[]) { // benchmark5(); // benchmark6(); // benchmark7(); -// benchmark8(); + //benchmark8(); // benchmark9(); // benchmark10(); // benchmark11(); diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c index 1578990b..30cedad9 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c @@ -1577,10 +1577,45 @@ void dtw_wps_positivize_value(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t r @param rb Row begin @param re Row end */ -void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re) { +void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce) { + idx_t i, j, wpsi, cbp, cep, cbs, ces; idx_t idx = rb*p->width;; - for (idx_t i=rb; iwidth; j++) { + for (i=rb; iwidth; j++) { + if (wps[idx] > 0 && wps[idx] != INFINITY) { + wps[idx] = -wps[idx]; + } + idx++; + } + } + // above + for (i=1; i [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; j 0 && wps[idx] != INFINITY) { + wps[idx] = -wps[idx]; + } + idx++; + } + } + // below + for (i=re; i [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; j 0 && wps[idx] != INFINITY) { wps[idx] = -wps[idx]; } @@ -1590,16 +1625,51 @@ void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re) { } -void dtw_wps_positivize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re) { +void dtw_wps_positivize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce) { + idx_t i, j, wpsi, cbp, cep, cbs, ces; idx_t idx = rb*p->width;; - for (idx_t i=rb; iwidth; j++) { + for (i=rb; iwidth; j++) { if (wps[idx] < 0 && wps[idx] != -INFINITY) { wps[idx] = -wps[idx]; } idx++; } } + // above + for (i=1; i [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; j [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; jwidth; + idx_t min_ci, max_ci; + + // First row is inf + ri_width = p->width; + + // A. + min_ci = 0; + max_ci = p->window + p->ldiffc + 1; + for (ri=1; riri1+1; ri++) { + ci = min_ci; + if (ri == r) { + *cb = min_ci; + *ce = max_ci; + return ri_width; + } + max_ci++; + ri_width += p->width; + } + + // B. + min_ci = 0; + max_ci = l2 + 1; + for (ri=p->ri1+1; riri2+1; ri++) { + ci = min_ci; + if (ri == r) { + *cb = min_ci; + *ce = max_ci; + return ri_width; + } + ri_width += p->width; + } + + // C. + min_ci = 1; + max_ci = 1 + 2 * p->window - 1 + p->ldiff + 1; + for (ri=p->ri2+1; riri3+1; ri++) { + ci = min_ci; + if (ri == r) { + *cb = min_ci; + *ce = max_ci; + return ri_width; + } + min_ci++; + max_ci++; + ri_width += p->width; + } + + // D. + min_ci = MAX(0, p->ri3 + 1 - p->window - p->ldiff); + max_ci = l2 + 1; + wpsi_start = 2; + if (p->ri2 == p->ri3) { + // C is skipped + wpsi_start = min_ci + 1; + } + for (ri=p->ri3+1; riwidth; + } + + return 0; +} + + /*! Get maximal value in matrix diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h index 65e92a24..ad8701a7 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h @@ -131,9 +131,10 @@ void dtw_expand_wps_affinity(seq_t *wps, seq_t *full, idx_t l1, idx_t l2, DTWSet void dtw_expand_wps_slice_affinity(seq_t *wps, seq_t *full, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce, DTWSettings *settings); void dtw_wps_negativize_value(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t r, idx_t c); void dtw_wps_positivize_value(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t r, idx_t c); -void dtw_wps_positivize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re); -void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re); +void dtw_wps_positivize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce); +void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce); idx_t dtw_wps_loc(DTWWps* p, idx_t r, idx_t c, idx_t l1, idx_t l2); +idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, idx_t l2); idx_t dtw_wps_max(DTWWps* p, seq_t *wps, idx_t *r, idx_t *c, idx_t l1, idx_t l2); idx_t dtw_best_path(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, DTWSettings *settings); idx_t dtw_best_path_affinity(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, idx_t s1, idx_t s2, DTWSettings *settings); diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c index 60075d5b..e33b9a9b 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c @@ -160,10 +160,45 @@ void dtw_wps_positivize_value(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t r @param rb Row begin @param re Row end */ -void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re) { +void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce) { + idx_t i, j, wpsi, cbp, cep, cbs, ces; idx_t idx = rb*p->width;; - for (idx_t i=rb; iwidth; j++) { + for (i=rb; iwidth; j++) { + if (wps[idx] > 0 && wps[idx] != INFINITY) { + wps[idx] = -wps[idx]; + } + idx++; + } + } + // above + for (i=1; i [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; j 0 && wps[idx] != INFINITY) { + wps[idx] = -wps[idx]; + } + idx++; + } + } + // below + for (i=re; i [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; j 0 && wps[idx] != INFINITY) { wps[idx] = -wps[idx]; } @@ -173,16 +208,51 @@ void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re) { } -void dtw_wps_positivize(DTWWps* p, seq_t *wps, idx_t rb, idx_t re) { +void dtw_wps_positivize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce) { + idx_t i, j, wpsi, cbp, cep, cbs, ces; idx_t idx = rb*p->width;; - for (idx_t i=rb; iwidth; j++) { + for (i=rb; iwidth; j++) { if (wps[idx] < 0 && wps[idx] != -INFINITY) { wps[idx] = -wps[idx]; } idx++; } } + // above + for (i=1; i [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; j [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ + idx = wpsi + (cb - cbs); + for (j=cbp; jwidth; + idx_t min_ci, max_ci; + + // First row is inf + ri_width = p->width; + + // A. + min_ci = 0; + max_ci = p->window + p->ldiffc + 1; + for (ri=1; riri1+1; ri++) { + ci = min_ci; + if (ri == r) { + *cb = min_ci; + *ce = max_ci; + return ri_width; + } + max_ci++; + ri_width += p->width; + } + + // B. + min_ci = 0; + max_ci = l2 + 1; + for (ri=p->ri1+1; riri2+1; ri++) { + ci = min_ci; + if (ri == r) { + *cb = min_ci; + *ce = max_ci; + return ri_width; + } + ri_width += p->width; + } + + // C. + min_ci = 1; + max_ci = 1 + 2 * p->window - 1 + p->ldiff + 1; + for (ri=p->ri2+1; riri3+1; ri++) { + ci = min_ci; + if (ri == r) { + *cb = min_ci; + *ce = max_ci; + return ri_width; + } + min_ci++; + max_ci++; + ri_width += p->width; + } + + // D. + min_ci = MAX(0, p->ri3 + 1 - p->window - p->ldiff); + max_ci = l2 + 1; + wpsi_start = 2; + if (p->ri2 == p->ri3) { + // C is skipped + wpsi_start = min_ci + 1; + } + for (ri=p->ri3+1; riwidth; + } + + return 0; +} + + /*! Get maximal value in matrix diff --git a/dtaidistance/lib/DTAIDistanceC/Makefile b/dtaidistance/lib/DTAIDistanceC/Makefile index 44e5dc2a..d89ff368 100644 --- a/dtaidistance/lib/DTAIDistanceC/Makefile +++ b/dtaidistance/lib/DTAIDistanceC/Makefile @@ -18,8 +18,10 @@ CC=gcc # CFLAGS=-Wall -g -Xpreprocessor -fopenmp -lomp -I/opt/homebrew/include -L/opt/homebrew/lib CFLAGS=-Wall -g -Xpreprocessor -fopenmp -lomp -I/opt/homebrew/include # CFLAGS="-Wall -g -Xpreprocessor -fopenmp -lomp -I/opt/homebrew/opt/llvm/include -I/opt/homebrew/include" -# LDFLAGS="-L/opt/homebrew/opt/llvm/lib -L/opt/homebrew/lib" -LDFLAGS=/opt/homebrew/lib/libomp.a -Wall -g +# LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L/opt/homebrew/lib -lomp +LDFLAGS=-L/opt/homebrew/lib -lomp +# LDFLAGS=/opt/homebrew/lib/libomp.a -Wall -g +# LDFLAGS=-L/opt/homebrew/lib -lomp -Xlinker -rpath -Xlinker /usr/local/lib2 CPPFLAGS= DEPS = DTAIDistanceC/dd_globals.h jinja OBJ = DTAIDistanceC/dd_benchmark.o \ From 74d7340c5bd3ffc8bd0465ba9ad8cda18ddfdfa6 Mon Sep 17 00:00:00 2001 From: wannesm Date: Sun, 16 Oct 2022 23:19:53 +0200 Subject: [PATCH 12/59] gcc pragma --- dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h index ad8701a7..b1b93263 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h @@ -31,10 +31,13 @@ static volatile int keepRunning = 1; @var printPrecision @abstract Number of decimals to print when printing (partial) distances. */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" static int printPrecision = 3; static int printDigits = 7; // 3+4 static char printBuffer[20]; static char printFormat[5]; +#pragma GCC diagnostic pop /** From 921b6fd50f03f225f4b8cc9eeb07db5180efb8b2 Mon Sep 17 00:00:00 2001 From: wannesm Date: Sat, 22 Oct 2022 14:08:22 +0200 Subject: [PATCH 13/59] Make it easier to use float/int --- dtaidistance/jinja/Makefile | 20 ++++++-- dtaidistance/jinja/generate.py | 89 +++++++++++++++++++++------------- 2 files changed, 71 insertions(+), 38 deletions(-) diff --git a/dtaidistance/jinja/Makefile b/dtaidistance/jinja/Makefile index 2bc72021..b964f96c 100644 --- a/dtaidistance/jinja/Makefile +++ b/dtaidistance/jinja/Makefile @@ -3,11 +3,21 @@ DEPS_dtw_cc_omp = $(shell python3 generate.py -dq dtw_cc_omp.pyx) DEPS_globals = $(shell python3 generate.py -dq dtaidistancec_globals.pxd) DEPS_dtw_cc_pxd = $(shell python3 generate.py -dq dtw_cc.pxd) DEPS_ed_cc = $(shell python3 generate.py -dq ed_cc.pyx) +ARGS := .PHONY: default default: clean generate +.PHONY: float +float: ARGS := --seqt=float +float: clean generate + +.PHONY: int +int: ARGS := --seqt=int +int: clean generate + + .PHONY: generate generate: jinja replace @@ -16,35 +26,35 @@ jinja: dtw_cc.pyx dtw_cc_omp.pyx dtaidistancec_globals.pxd dtw_cc.pxd dtw_cc.pyx: $(DEPS_dtw_cc) @echo "Changed:" $? - python3 generate.py $@ + python3 generate.py $(ARGS) $@ ../dtw_cc.pyx: dtw_cc.pyx cp dtw_cc.pyx ../ dtw_cc_omp.pyx: $(DEPS_dtw_cc_omp) @echo "Changed:" $? - python3 generate.py $@ + python3 generate.py $(ARGS) $@ ../dtw_cc_omp.pyx: dtw_cc_omp.pyx cp dtw_cc_omp.pyx ../ dtw_cc.pxd: $(DEPS_dtw_cc_pxd) @echo "Changed:" $? - python3 generate.py $@ + python3 generate.py $(ARGS) $@ ../dtw_cc.pxd: dtw_cc.pxd cp dtw_cc.pxd ../ dtaidistancec_globals.pxd: $(DEPS_globals) @echo "Changed:" $? - python3 generate.py $@ + python3 generate.py $(ARGS) $@ ../dtaidistancec_globals.pxd: dtaidistancec_globals.pxd cp dtaidistancec_globals.pxd ../ ed_cc.pyx: $(DEPS_ed_cc) @echo "Changed:" $? - python3 generate.py $@ + python3 generate.py $(ARGS) $@ ../ed_cc.pyx: ed_cc.pyx cp ed_cc.pyx ../ diff --git a/dtaidistance/jinja/generate.py b/dtaidistance/jinja/generate.py index 0742dd79..e2cb19a3 100755 --- a/dtaidistance/jinja/generate.py +++ b/dtaidistance/jinja/generate.py @@ -24,34 +24,37 @@ seq_format = "d" # https://docs.python.org/3/library/array.html # Also change the type in lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h -set_vars = {"seq_tpy": seq_tpy, "seq_t": seq_t, "seq_format": seq_format} - - -targets = { - "dtw_cc.pyx": - ["dtw_cc.jinja.pyx", - set_vars, - ["dtw_cc_warpingpaths.jinja.pyx", - "dtw_cc_distancematrix.jinja.pyx", - "dtw_cc_warpingpath.jinja.pyx", - "dtw_cc_dba.jinja.pyx"]], - "dtw_cc_omp.pyx": - ["dtw_cc_omp.jinja.pyx", - set_vars, - []], - "dtw_cc.pxd": - ["dtw_cc.jinja.pxd", - set_vars, - []], - "dtaidistancec_globals.pxd": - ["dtaidistancec_globals.jinja.pxd", - set_vars, - []], - "ed_cc.pyx": - ["ed_cc.jinja.pyx", - set_vars, - []], -} + +def set_vars(): + return {"seq_tpy": seq_tpy, "seq_t": seq_t, "seq_format": seq_format} + + +def targets(): + return { + "dtw_cc.pyx": + ["dtw_cc.jinja.pyx", + set_vars(), + ["dtw_cc_warpingpaths.jinja.pyx", + "dtw_cc_distancematrix.jinja.pyx", + "dtw_cc_warpingpath.jinja.pyx", + "dtw_cc_dba.jinja.pyx"]], + "dtw_cc_omp.pyx": + ["dtw_cc_omp.jinja.pyx", + set_vars(), + []], + "dtw_cc.pxd": + ["dtw_cc.jinja.pxd", + set_vars(), + []], + "dtaidistancec_globals.pxd": + ["dtaidistancec_globals.jinja.pxd", + set_vars(), + []], + "ed_cc.pyx": + ["ed_cc.jinja.pyx", + set_vars(), + []], + } essential_targets = ['dtw_cc.pyx', 'dtw_cc.pxd', 'dtaidistancec_globals.pxd', 'dtw_cc_omp.pyx', 'ed_cc.pyx'] @@ -59,7 +62,7 @@ def generate(target): logger.info(f'Generating: {target}') fno = target - fni, kwargs, _deps = targets[target] + fni, kwargs, _deps = targets()[target] template = templateEnv.get_template(fni) outputText = template.render(**kwargs) with open(fno, 'w') as o: @@ -68,17 +71,22 @@ def generate(target): def dependencies(target): logger.info(f'Dependencies for: {target}') - fni, _kwargs, deps = targets[target] + fni, _kwargs, deps = targets()[target] return [fni] + deps def main(argv=None): + global seq_t + global seq_tpy + global seq_format + global set_vars + parser = argparse.ArgumentParser(description='Generate source code files from templates') parser.add_argument('--verbose', '-v', action='count', default=0, help='Verbose output') parser.add_argument('--quiet', '-q', action='count', default=0, help='Quiet output') parser.add_argument('--deps', '-d', action='store_true', help='Print dependencies') parser.add_argument('--targets', '-t', action='store_true', help='Print available targets') - # parser.add_argument('--output', '-o', required=True, help='Output file') + parser.add_argument('--seqt', help='Data type to use for values in the sequence (default double)') # parser.add_argument('--version', action='version', version='%(prog)s 1.0') parser.add_argument('input', nargs='*', help='List of target files to generate') args = parser.parse_args(argv) @@ -88,15 +96,30 @@ def main(argv=None): if args.targets: print('Targets:') - for k in targets.keys(): + for k in targets().keys(): e = ' (default)' if k in essential_targets else '' print(f'- {k}{e}') return 0 + if args.seqt: + if args.seqt == "double": + pass + elif args.seqt == "float": + seq_t = "float" + seq_tpy = "float" + seq_format = "f" + elif args.seqt == "int": + seq_t = "int" + seq_tpy = "int" + seq_format = "i" + else: + raise TypeError("seqt should be one of double/float/int") + + if args.input is None or len(args.input) == 0: inputs = essential_targets elif args.input[0] == "all": - inputs = targets.keys() + inputs = targets().keys() else: inputs = args.input From f5cd0373a85e2bb1b755356c7f7c24f953758eb1 Mon Sep 17 00:00:00 2001 From: wannesm Date: Sat, 22 Oct 2022 14:13:56 +0200 Subject: [PATCH 14/59] Make it easier to use float/int --- dtaidistance/jinja/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dtaidistance/jinja/Makefile b/dtaidistance/jinja/Makefile index b964f96c..f6b86442 100644 --- a/dtaidistance/jinja/Makefile +++ b/dtaidistance/jinja/Makefile @@ -12,14 +12,16 @@ default: clean generate .PHONY: float float: ARGS := --seqt=float float: clean generate + sed -i '' 's/^typedef .* seq_t;$$/typedef float seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: int int: ARGS := --seqt=int int: clean generate - + sed -i '' 's/^typedef .* seq_t;$$/typedef int seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: generate generate: jinja replace + sed -i '' 's/^typedef .* seq_t;$$/typedef double seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: jinja jinja: dtw_cc.pyx dtw_cc_omp.pyx dtaidistancec_globals.pxd dtw_cc.pxd From 7c7459b3d1990c5a0b8f685517c59937a605ed83 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 25 Oct 2022 12:40:04 +0200 Subject: [PATCH 15/59] msm implementation --- dtaidistance/msm.py | 57 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 dtaidistance/msm.py diff --git a/dtaidistance/msm.py b/dtaidistance/msm.py new file mode 100644 index 00000000..a2e2313b --- /dev/null +++ b/dtaidistance/msm.py @@ -0,0 +1,57 @@ +# -*- coding: UTF-8 -*- +""" +dtaidistance.msm +~~~~~~~~~~~~~~~~ + +Move-Split-Merge (MSM) + +:author: Wannes Meert +:copyright: Copyright 2022 KU Leuven, DTAI Research Group. +:license: Apache License, Version 2.0, see LICENSE for details. + +""" +import logging +import math + +import numpy as np + + +def distance(x, y, sm_cost=0.1): + """MSM distance + + A. Stefan, V. Athitsos, and G. Das. + The move-split-merge metric for time series. + IEEE transactions on Knowledge and Data Engineering, + 25(6):1425–1438, 2012. + + :param x: first time series + :param y: second time series + :param sm_cost: Split-Merge cost + :return: MSM distance + """ + # setup + def c(a, b, c): + if (b <= a <= c) or (b >= a >= c): + return sm_cost + return sm_cost + min(abs(a - b), abs(a - c)) + m = len(x) + n = len(y) + cost = np.zeros((m, n)) + + # initialization + cost[0, 0] = abs(x[0] - y[0]) + for i in range(1, m): + cost[i, 0] = cost[i-1, 0] + c(x[i], x[i-1], y[0]) + for j in range(1, n): + cost[0, j] = cost[0, j-1] + c(y[j], x[0], y[j-1]) + + # main loop + for i in range(1, n): + for j in range(1, m): + d = [cost[i - 1][j - 1] + abs(x[i] - y[j]), + cost[i - 1][j] + c(x[i], x[i - 1], y[j]), + cost[i][j - 1] + c(y[j], x[i], y[j - 1])] + # print(d) + cost[i, j] = np.min(d) + + return cost[m-1, n-1] From 35b6d8e03acc81b360a7b28f634e50da5720522c Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 25 Oct 2022 12:55:58 +0200 Subject: [PATCH 16/59] distance to similarity --- dtaidistance/similarity.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 dtaidistance/similarity.py diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py new file mode 100644 index 00000000..b0bd21b0 --- /dev/null +++ b/dtaidistance/similarity.py @@ -0,0 +1,36 @@ +try: + import numpy as np +except ImportError: + np = None + + +def distance_to_similarity(D, r=None, method='exponential'): + """Transform a distance matrix to a similarity matrix. + + The avaiable methods are: + - Exponential: e^(-D / r) + r is 1 if not given + - Reciprocal: 1 / (r + D) + r is 0.0000001 if not given + - Reverse: r - D + r is min(D) + max(D) if not given + + :param D: The distance matrix + :param r: A scaling or smoothing parameter. + :param method: One of 'exponential', 'reciprocal', 'reverse' + :return: Similarity matrix S + """ + method = method.lower() + if method == 'exponential': + if r is None: + r = 1 + S = np.exp(-D / r) + elif method == 'reciprocal': + if r is None: + r = 0.0000001 + S = 1 / (r + D) + elif method == 'reverse': + if r is None: + r = np.min(D) + np.max(D) + S = r - D + return S From e010034907cfb32bf4d2adfa50b007089a1186f5 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 25 Oct 2022 14:18:04 +0200 Subject: [PATCH 17/59] similarity --- dtaidistance/similarity.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index b0bd21b0..b977cfdd 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -9,7 +9,7 @@ def distance_to_similarity(D, r=None, method='exponential'): The avaiable methods are: - Exponential: e^(-D / r) - r is 1 if not given + r is max(D) if not given - Reciprocal: 1 / (r + D) r is 0.0000001 if not given - Reverse: r - D @@ -23,8 +23,12 @@ def distance_to_similarity(D, r=None, method='exponential'): method = method.lower() if method == 'exponential': if r is None: - r = 1 + r = np.max(D) S = np.exp(-D / r) + elif method == 'gaussian': + if r is None: + r = np.max(D) + S = np.exp(-np.power(D, 2) / r) elif method == 'reciprocal': if r is None: r = 0.0000001 @@ -33,4 +37,6 @@ def distance_to_similarity(D, r=None, method='exponential'): if r is None: r = np.min(D) + np.max(D) S = r - D + else: + raise ValueError("method={} is not supported".format(method)) return S From 2a5d4dd4f848f2a76dfbd0abcb9b79b9c980eff2 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 25 Oct 2022 16:15:06 +0200 Subject: [PATCH 18/59] similarity --- dtaidistance/similarity.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index b977cfdd..5f6d56c1 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -11,7 +11,7 @@ def distance_to_similarity(D, r=None, method='exponential'): - Exponential: e^(-D / r) r is max(D) if not given - Reciprocal: 1 / (r + D) - r is 0.0000001 if not given + r is 1 if not given - Reverse: r - D r is min(D) + max(D) if not given @@ -31,12 +31,21 @@ def distance_to_similarity(D, r=None, method='exponential'): S = np.exp(-np.power(D, 2) / r) elif method == 'reciprocal': if r is None: - r = 0.0000001 + r = 1 S = 1 / (r + D) elif method == 'reverse': if r is None: r = np.min(D) + np.max(D) - S = r - D + S = (r - D) / r else: raise ValueError("method={} is not supported".format(method)) return S + + +def squash(X, r=None, base=None): + """Squash a function monotonically to a range between 0 and 1.""" + if r is None: + r = 1 + if base is None: + return 1 - np.exp(-np.power(X, 2) / r**2) + return 1 - np.power(base, -np.power(X, 2) / r**2) From c202c923a4f0e98ef0fe4b0f2c90025d95889b12 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 25 Oct 2022 16:17:03 +0200 Subject: [PATCH 19/59] similarity --- dtaidistance/similarity.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index 5f6d56c1..f979d661 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -43,7 +43,13 @@ def distance_to_similarity(D, r=None, method='exponential'): def squash(X, r=None, base=None): - """Squash a function monotonically to a range between 0 and 1.""" + """Squash a function monotonically to a range between 0 and 1. + + Based on: + Vercruyssen, V., Meert, W., Verbruggen, G., Maes, K., Baumer, R., & Davis, J. + (2018). Semi-supervised anomaly detection with an application to water analytics. + In 2018 IEEE international conference on data mining (ICDM) (Vol. 2018, pp. 527-536) + """ if r is None: r = 1 if base is None: From 392dfa24c3074fbc0dfb13eb068714aac9c7b6bf Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 25 Oct 2022 16:27:12 +0200 Subject: [PATCH 20/59] squashing --- dtaidistance/similarity.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index f979d661..229dbe93 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -42,7 +42,7 @@ def distance_to_similarity(D, r=None, method='exponential'): return S -def squash(X, r=None, base=None): +def squash(X, r=None, base=None, x0=0, method="logistic"): """Squash a function monotonically to a range between 0 and 1. Based on: @@ -50,8 +50,16 @@ def squash(X, r=None, base=None): (2018). Semi-supervised anomaly detection with an application to water analytics. In 2018 IEEE international conference on data mining (ICDM) (Vol. 2018, pp. 527-536) """ - if r is None: - r = 1 - if base is None: - return 1 - np.exp(-np.power(X, 2) / r**2) - return 1 - np.power(base, -np.power(X, 2) / r**2) + if method == "gaussian": + x0 = 0 # not supported for gaussian + if r is None: + r = 1 + if base is None: + return 1 - np.exp(-np.power(X - x0, 2) / r**2) + return 1 - np.power(base, -np.power(X - x0, 2) / r**2) + elif method == "logistic": + if r is None: + r = 1 + if base is None: + return 1 / (1 + np.exp(-(X - x0) / r)) + return 1 / (1 + np.power(base, -(X - x0) / r)) From dd8dffa42e78a52945c8138388cb65e0e8e3ebc3 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 25 Oct 2022 16:34:25 +0200 Subject: [PATCH 21/59] similarity --- dtaidistance/similarity.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index 229dbe93..11b0539f 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -10,6 +10,8 @@ def distance_to_similarity(D, r=None, method='exponential'): The avaiable methods are: - Exponential: e^(-D / r) r is max(D) if not given + - Gaussian: e^(-D^2 / r^2) + r is max(D) if not given - Reciprocal: 1 / (r + D) r is 1 if not given - Reverse: r - D @@ -28,7 +30,7 @@ def distance_to_similarity(D, r=None, method='exponential'): elif method == 'gaussian': if r is None: r = np.max(D) - S = np.exp(-np.power(D, 2) / r) + S = np.exp(-np.power(D, 2) / r**2) elif method == 'reciprocal': if r is None: r = 1 @@ -45,6 +47,10 @@ def distance_to_similarity(D, r=None, method='exponential'): def squash(X, r=None, base=None, x0=0, method="logistic"): """Squash a function monotonically to a range between 0 and 1. + The available methods are: + - Logistic: 1 / (1 + e^(-(X-x0) / r) + - Gaussian: e^(-(X-x0)^2 / r^2) + Based on: Vercruyssen, V., Meert, W., Verbruggen, G., Maes, K., Baumer, R., & Davis, J. (2018). Semi-supervised anomaly detection with an application to water analytics. From 1e73305870297c71110abb9d1dd9d40e4bc7ba5b Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 31 Oct 2022 22:56:03 +0100 Subject: [PATCH 22/59] subsequence fix --- dtaidistance/subsequence/dtw.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index b15918a2..20522200 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -622,9 +622,12 @@ def align(self, k=None): heapq.heappushpop(h, (-dist, idx)) max_dist = -min(h)[0] self.dists_options['max_dist'] = max_dist - if self.keep_all_distances: + if k is None or self.keep_all_distances: self.distances[idx] = dist + if k is not None: self.kbest_distances = sorted((-v, i) for v, i in h) + else: + self.kbest_distances = sorted((self.distances[i], i) for i in np.argsort(self.distances)) self.k = k From 192025bb4a6d1a270ec90febef7e04b1fcd7ed4d Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 31 Oct 2022 23:24:40 +0100 Subject: [PATCH 23/59] subsequence fix --- dtaidistance/subsequence/dtw.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index 20522200..b4a0cac3 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -625,9 +625,11 @@ def align(self, k=None): if k is None or self.keep_all_distances: self.distances[idx] = dist if k is not None: + # hh = np.array([-v for v, _ in h]) + # self.kbest_distances = [(-h[i][0], h[i][1]) for i in np.argsort(hh)] self.kbest_distances = sorted((-v, i) for v, i in h) else: - self.kbest_distances = sorted((self.distances[i], i) for i in np.argsort(self.distances)) + self.kbest_distances = [(self.distances[i], i) for i in np.argsort(self.distances)] self.k = k From 45be3b2457b78620acdc2d9c5b3d86c2ef3f39e8 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 31 Oct 2022 23:32:40 +0100 Subject: [PATCH 24/59] subsequence fix --- dtaidistance/subsequence/dtw.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index b4a0cac3..f9f2bfb2 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -612,7 +612,7 @@ def align(self, k=None): if self.use_lb and self.lbs[idx] > max_dist: continue dist = dtw.distance(self.query, series, **self.dists_options) - if k is not None: + if k is not None and not self.keep_all_distances: if len(h) < k: if not np.isinf(dist): heapq.heappush(h, (-dist, idx)) @@ -624,7 +624,7 @@ def align(self, k=None): self.dists_options['max_dist'] = max_dist if k is None or self.keep_all_distances: self.distances[idx] = dist - if k is not None: + if k is not None and not self.keep_all_distances: # hh = np.array([-v for v, _ in h]) # self.kbest_distances = [(-h[i][0], h[i][1]) for i in np.argsort(hh)] self.kbest_distances = sorted((-v, i) for v, i in h) From 34b98dff68703c6918758e69853031736414864c Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 31 Oct 2022 23:43:42 +0100 Subject: [PATCH 25/59] subsequence fix --- dtaidistance/subsequence/dtw.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index f9f2bfb2..af01d374 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -601,7 +601,7 @@ def align_fast(self, k=None): def align(self, k=None): if self.distances is not None and self.k >= k: return - if self.keep_all_distances: + if k is None or self.keep_all_distances: self.distances = np.zeros((len(self.s),)) if self.use_lb: self.compute_lbs() @@ -612,7 +612,7 @@ def align(self, k=None): if self.use_lb and self.lbs[idx] > max_dist: continue dist = dtw.distance(self.query, series, **self.dists_options) - if k is not None and not self.keep_all_distances: + if k is not None: if len(h) < k: if not np.isinf(dist): heapq.heappush(h, (-dist, idx)) @@ -622,9 +622,9 @@ def align(self, k=None): heapq.heappushpop(h, (-dist, idx)) max_dist = -min(h)[0] self.dists_options['max_dist'] = max_dist - if k is None or self.keep_all_distances: + if self.keep_all_distances: self.distances[idx] = dist - if k is not None and not self.keep_all_distances: + if k is not None: # hh = np.array([-v for v, _ in h]) # self.kbest_distances = [(-h[i][0], h[i][1]) for i in np.argsort(hh)] self.kbest_distances = sorted((-v, i) for v, i in h) From beff2cc29e0c7407f92a31371b8d4f34a96b8efc Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 1 Nov 2022 10:44:57 +0100 Subject: [PATCH 26/59] Improvements for subsequence search --- dtaidistance/subsequence/dtw.py | 45 +++++++++++++++++++++------------ tests/test_subsequence.py | 42 +++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 17 deletions(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index af01d374..0de65017 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -491,17 +491,21 @@ def best_path(self, row, col): return p -def subsequence_search(query, series, dists_options=None, use_lb=False, keep_all_distances=False): +def subsequence_search(query, series, dists_options=None, use_lb=False, keep_all_distances=False, + max_dist=None, max_value=None): """See SubsequenceSearch. :param query: Time series to search for :param series: Iterator over time series to perform search on. This can be for example windows over a long time series. - :param dists_options: Options passed on to dtw.distance + :param dists_options: Options passed on to `dtw.distance` + :param max_dist: Ignore DTW distances larger than this value + :param max_value: Ignore normalized DTW distances larger than this value :return: SubsequenceSearch object """ ss = SubsequenceSearch(query, series, dists_options=dists_options, use_lb=use_lb, - keep_all_distances=keep_all_distances) + keep_all_distances=keep_all_distances, + max_dist=max_dist, max_value=max_value) return ss @@ -561,13 +565,18 @@ def __str__(self): class SubsequenceSearch: - def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distances=False): + def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distances=False, + max_dist=None, max_value=None): """Search the best matching (subsequence) time series compared to a given time series. :param query: Time series to search for :param s: Iterator over time series to perform search on. This can be for example windows over a long time series. - :param dists_options: Options passed on to dtw.distance + :param dists_options: Options passed on to `dtw.distance` + :param max_dist: Ignore DTW distances larger than this value + if max_dist is also given in dists_options, then the one in dists_options is ignored + if both max_dist and max_value are given, the smallest is used + :param max_value: Ignore normalized DTW distances larger than this value """ self.query = query self.s = s @@ -576,11 +585,15 @@ def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distance self.lbs = None self.k = None self.dists_options = {} if dists_options is None else dists_options - self.use_lb = use_lb - if self.k is None: - self.keep_all_distances = True + if max_dist is None: + self.max_dist = self.dists_options.get('max_dist', np.inf) else: - self.keep_all_distances = keep_all_distances + self.max_dist = max_dist + if max_value is not None: + self.max_dist = min(self.max_dist, max_value * len(self.query)) + self.dists_options['max_dist'] = self.max_dist + self.use_lb = use_lb + self.keep_all_distances = keep_all_distances if self.use_lb and not self.keep_all_distances: raise ValueError("If use_lb is true, then keep_all_distances should also be true.") @@ -607,27 +620,27 @@ def align(self, k=None): self.compute_lbs() import heapq h = [(-np.inf, -1)] - max_dist = np.inf + max_dist = self.max_dist for idx, series in enumerate(self.s): if self.use_lb and self.lbs[idx] > max_dist: continue dist = dtw.distance(self.query, series, **self.dists_options) if k is not None: if len(h) < k: - if not np.isinf(dist): + if not np.isinf(dist) and dist <= max_dist: heapq.heappush(h, (-dist, idx)) - max_dist = -min(h)[0] + max_dist = min(max_dist, -h[0][0]) else: - if not np.isinf(dist): + if not np.isinf(dist) and dist <= max_dist: heapq.heappushpop(h, (-dist, idx)) - max_dist = -min(h)[0] + max_dist = min(max_dist, -h[0][0]) self.dists_options['max_dist'] = max_dist - if self.keep_all_distances: + if self.keep_all_distances or k is None: self.distances[idx] = dist if k is not None: # hh = np.array([-v for v, _ in h]) # self.kbest_distances = [(-h[i][0], h[i][1]) for i in np.argsort(hh)] - self.kbest_distances = sorted((-v, i) for v, i in h) + self.kbest_distances = sorted((-v, i) for v, i in h if i != -1) else: self.kbest_distances = [(self.distances[i], i) for i in np.argsort(self.distances)] diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index c0aea8f3..7d655622 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -233,6 +233,45 @@ def create_data_subseqsearch_eeg(np, dtype=None): return query, s, k, series, s_idx +@numpyonly +def test_dtw_subseqsearch_eeg2(): + with util_numpy.test_uses_numpy() as np: + query, s, k, series, s_idx = create_data_subseqsearch_eeg(np) + sa = subsequence_search(query, s, dists_options={'use_c': True}, + keep_all_distances=False) + best = sa.kbest_matches_fast(k=k) + assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best) + assert sa.distances is None + + sa = subsequence_search(query, s, dists_options={'use_c': True}, + keep_all_distances=False) + best = sa.kbest_matches_fast(k=1) + assert str(best) == "[SSMatch(15)]", str(best) + + sa = subsequence_search(query, s, dists_options={'use_c': True}, + keep_all_distances=False) + best = sa.kbest_matches_fast(k=None) + assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4), SSMatch(11), SSMatch(6) ... SSMatch(14), SSMatch(10), SSMatch(9), SSMatch(1), SSMatch(3)]", str(best) + + assert best[0].value == pytest.approx(0.08045349583339727) + + sa = subsequence_search(query, s, dists_options={'use_c': True, 'max_dist': 0.0805 * len(query)}, + keep_all_distances=False) + best = sa.kbest_matches_fast(k=k) + assert str(best) == "[SSMatch(15)]", str(best) + + sa = subsequence_search(query, s, max_value=0.0805, + keep_all_distances=False) + best = sa.kbest_matches_fast(k=k) + assert str(best) == "[SSMatch(15)]", str(best) + + sa = subsequence_search(query, s, max_dist=0.0805 * len(query), + keep_all_distances=False) + best = sa.kbest_matches_fast(k=k) + assert str(best) == "[SSMatch(15)]", str(best) + + + @numpyonly @pytest.mark.benchmark(group="subseqsearch_eeg") def test_dtw_subseqsearch_eeg(benchmark): @@ -324,6 +363,7 @@ def run(): # test_dtw_subseq_bug1() # test_dtw_subseq_ndim() # test_dtw_localconcurrences_eeg() - test_dtw_subseqsearch_eeg(benchmark=None) + test_dtw_subseqsearch_eeg2() + # test_dtw_subseqsearch_eeg(benchmark=None) # test_dtw_subseqsearch_eeg_ub(benchmark=None) # test_dtw_localconcurrences_short() From af9b1fbd37172f48b3f248793c897f029b9c1800 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 1 Nov 2022 11:42:21 +0100 Subject: [PATCH 27/59] jinja makefile --- dtaidistance/jinja/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dtaidistance/jinja/Makefile b/dtaidistance/jinja/Makefile index f6b86442..e0968f08 100644 --- a/dtaidistance/jinja/Makefile +++ b/dtaidistance/jinja/Makefile @@ -9,6 +9,9 @@ ARGS := .PHONY: default default: clean generate +.PHONY: double +double: default + .PHONY: float float: ARGS := --seqt=float float: clean generate From 0194878fde99eb4d562f616028b5a56ae8036147 Mon Sep 17 00:00:00 2001 From: wannesm Date: Fri, 4 Nov 2022 13:58:58 +0100 Subject: [PATCH 28/59] docs --- dtaidistance/similarity.py | 12 ++++++++++++ dtaidistance/subsequence/dtw.py | 15 +++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index 11b0539f..10639f55 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -17,6 +17,12 @@ def distance_to_similarity(D, r=None, method='exponential'): - Reverse: r - D r is min(D) + max(D) if not given + Example usage:: + + dist_matrix = dtw.distance_matrix(series) + sim_matrix = distance_to_similarity(dist_matrix) + + :param D: The distance matrix :param r: A scaling or smoothing parameter. :param method: One of 'exponential', 'reciprocal', 'reverse' @@ -51,6 +57,12 @@ def squash(X, r=None, base=None, x0=0, method="logistic"): - Logistic: 1 / (1 + e^(-(X-x0) / r) - Gaussian: e^(-(X-x0)^2 / r^2) + Example usage:: + + dist_matrix = dtw.distance_matrix(series) + dist_matrix_sq = squash(dist_matrix) + + Based on: Vercruyssen, V., Meert, W., Verbruggen, G., Maes, K., Baumer, R., & Davis, J. (2018). Semi-supervised anomaly detection with an application to water analytics. diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index 0de65017..db484ece 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -108,12 +108,13 @@ def __init__(self, query, series, penalty=0.1, use_c=False): Based on Fundamentals of Music Processing, Meinard Müller, Springer, 2015. - Example: - query = np.array([1., 2, 0]) - series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0]) - sa = subsequence_search(query, series) - mf = sa.matching_function() - sa.kbest_matches(k=2) + Example:: + + query = np.array([1., 2, 0]) + series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0]) + sa = subsequence_search(query, series) + mf = sa.matching_function() + sa.kbest_matches(k=2) :param query: Subsequence to search for @@ -299,6 +300,8 @@ def __init__(self, lc, row=None, col=None): def path(self): if self._path is not None: return self._path + # TODO: always storing the path might be memory hungry + # but recomputing is impossible since the values are negated/masked afterwards self._path = self.lc.best_path(self.row, self.col) return self._path From 4acdf0cf98fd617d3d0deefb07385e2fa57851e9 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 16 Nov 2022 12:02:55 +0100 Subject: [PATCH 29/59] linux support for jinja --- dtaidistance/jinja/Makefile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/dtaidistance/jinja/Makefile b/dtaidistance/jinja/Makefile index e0968f08..7c67a2b3 100644 --- a/dtaidistance/jinja/Makefile +++ b/dtaidistance/jinja/Makefile @@ -5,6 +5,16 @@ DEPS_dtw_cc_pxd = $(shell python3 generate.py -dq dtw_cc.pxd) DEPS_ed_cc = $(shell python3 generate.py -dq ed_cc.pyx) ARGS := +ifeq ($(OS),Windows_NT) + detected_OS := Windows +else + detected_OS := $(shell sh -c 'uname 2>/dev/null || echo Unknown') +endif +SED = sed +ifeq ($(detected_OS),Darwin) + SED = sed -i +endif + .PHONY: default default: clean generate @@ -15,16 +25,16 @@ double: default .PHONY: float float: ARGS := --seqt=float float: clean generate - sed -i '' 's/^typedef .* seq_t;$$/typedef float seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + $(SED) '' 's/^typedef .* seq_t;$$/typedef float seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: int int: ARGS := --seqt=int int: clean generate - sed -i '' 's/^typedef .* seq_t;$$/typedef int seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + $(SED) '' 's/^typedef .* seq_t;$$/typedef int seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: generate generate: jinja replace - sed -i '' 's/^typedef .* seq_t;$$/typedef double seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + $(SED) '' 's/^typedef .* seq_t;$$/typedef double seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: jinja jinja: dtw_cc.pyx dtw_cc_omp.pyx dtaidistancec_globals.pxd dtw_cc.pxd From 8b959d418f8f4bfa5c56fc0715bdd4f9d6bf2104 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 16 Nov 2022 12:44:59 +0100 Subject: [PATCH 30/59] float: platform independent --- dtaidistance/jinja/Makefile | 25 +++++++++++++------------ dtaidistance/jinja/generate.py | 31 ++++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/dtaidistance/jinja/Makefile b/dtaidistance/jinja/Makefile index 7c67a2b3..f0343d34 100644 --- a/dtaidistance/jinja/Makefile +++ b/dtaidistance/jinja/Makefile @@ -5,15 +5,15 @@ DEPS_dtw_cc_pxd = $(shell python3 generate.py -dq dtw_cc.pxd) DEPS_ed_cc = $(shell python3 generate.py -dq ed_cc.pyx) ARGS := -ifeq ($(OS),Windows_NT) - detected_OS := Windows -else - detected_OS := $(shell sh -c 'uname 2>/dev/null || echo Unknown') -endif -SED = sed -ifeq ($(detected_OS),Darwin) - SED = sed -i -endif +# ifeq ($(OS),Windows_NT) +# detected_OS := Windows +# else +# detected_OS := $(shell sh -c 'uname 2>/dev/null || echo Unknown') +# endif +# SED = sed +# ifeq ($(detected_OS),Darwin) +# SED = sed -i +# endif .PHONY: default @@ -25,19 +25,20 @@ double: default .PHONY: float float: ARGS := --seqt=float float: clean generate - $(SED) '' 's/^typedef .* seq_t;$$/typedef float seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + @# $(SED) '' 's/^typedef .* seq_t;$$/typedef float seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: int int: ARGS := --seqt=int int: clean generate - $(SED) '' 's/^typedef .* seq_t;$$/typedef int seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + @# $(SED) '' 's/^typedef .* seq_t;$$/typedef int seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: generate generate: jinja replace - $(SED) '' 's/^typedef .* seq_t;$$/typedef double seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + @# $(SED) '' 's/^typedef .* seq_t;$$/typedef double seq_t;/g' ../lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h .PHONY: jinja jinja: dtw_cc.pyx dtw_cc_omp.pyx dtaidistancec_globals.pxd dtw_cc.pxd + python3 generate.py $(ARGS) dd_globals.h dtw_cc.pyx: $(DEPS_dtw_cc) @echo "Changed:" $? diff --git a/dtaidistance/jinja/generate.py b/dtaidistance/jinja/generate.py index e2cb19a3..1fffd04c 100755 --- a/dtaidistance/jinja/generate.py +++ b/dtaidistance/jinja/generate.py @@ -11,18 +11,29 @@ import argparse import logging import jinja2 +from pathlib import Path +from fileinput import FileInput logger = logging.getLogger(__name__) templateLoader = jinja2.FileSystemLoader(searchpath="./") templateEnv = jinja2.Environment(loader=templateLoader) +thisdir = Path(__file__).parent - +# Variables for the Jinja scripts seq_t = "double" seq_tpy = "double" seq_format = "d" # https://docs.python.org/3/library/array.html -# Also change the type in lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + +def change_dd_globals_h(): + # Also change the type in lib/DTAIDistanceC/DTAIDistanceC/dd_globals.h + dd_globals_h = thisdir.parent / "lib" / "DTAIDistanceC" / "DTAIDistanceC" / "dd_globals.h" + with FileInput(files=[dd_globals_h], inplace=True) as f: + for line in f: + if "typedef" in line and "seq_t" in line: + line = "typedef {} seq_t;\n".format(seq_t) + print(line, end='') def set_vars(): @@ -54,19 +65,25 @@ def targets(): ["ed_cc.jinja.pyx", set_vars(), []], + "dd_globals.h": + [change_dd_globals_h, + {}, []], } essential_targets = ['dtw_cc.pyx', 'dtw_cc.pxd', 'dtaidistancec_globals.pxd', - 'dtw_cc_omp.pyx', 'ed_cc.pyx'] + 'dtw_cc_omp.pyx', 'ed_cc.pyx', 'dd_globals.h'] def generate(target): logger.info(f'Generating: {target}') fno = target fni, kwargs, _deps = targets()[target] - template = templateEnv.get_template(fni) - outputText = template.render(**kwargs) - with open(fno, 'w') as o: - o.write(outputText) + if callable(fni): + fni() + else: + template = templateEnv.get_template(fni) + outputText = template.render(**kwargs) + with open(fno, 'w') as o: + o.write(outputText) def dependencies(target): From cd60f1164947d9b61bafd52f1f954a9068e81e3e Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 28 Dec 2022 00:43:16 +0100 Subject: [PATCH 31/59] Fix for warping paths in c --- dtaidistance/dtaidistancec_dtw.pxd | 12 +- dtaidistance/dtw.py | 18 +- dtaidistance/dtw_barycenter.py | 2 +- dtaidistance/dtw_cc.pyx | 70 +++++- .../DTAIDistanceC.xcodeproj/project.pbxproj | 2 - .../DTAIDistanceC/dd_benchmark.c | 222 +++++++++++++---- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c | 98 +++++--- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h | 14 +- .../DTAIDistanceC/dd_tests_dtw.c | 235 ++++++++++++++++-- .../DTAIDistanceC/jinja/dd_dtw.jinja.c | 62 +++-- .../DTAIDistanceC/jinja/dtw_bestpath.jinja.c | 4 +- .../DTAIDistanceC/jinja/dtw_distance.jinja.c | 6 +- .../DTAIDistanceC/jinja/dtw_expandwps.jinja.c | 2 + .../jinja/dtw_warpingpaths.jinja.c | 7 +- dtaidistance/lib/DTAIDistanceC/Makefile | 13 +- tests/test_barycenter.py | 4 +- tests/test_dtw2d.py | 19 +- tests/test_warping.py | 53 +++- 18 files changed, 665 insertions(+), 178 deletions(-) diff --git a/dtaidistance/dtaidistancec_dtw.pxd b/dtaidistance/dtaidistancec_dtw.pxd index dea3bf88..b0d929bd 100644 --- a/dtaidistance/dtaidistancec_dtw.pxd +++ b/dtaidistance/dtaidistancec_dtw.pxd @@ -79,13 +79,13 @@ cdef extern from "dd_dtw.h": DTWSettings *settings) Py_ssize_t dtw_best_path_prob(seq_t *wps, Py_ssize_t *i1, Py_ssize_t *i2, Py_ssize_t l1, Py_ssize_t l2, seq_t avg, DTWSettings *settings); - Py_ssize_t warping_path(seq_t *from_s, Py_ssize_t from_l, seq_t* to_s, Py_ssize_t to_l, - Py_ssize_t *from_i, Py_ssize_t *to_i, DTWSettings * settings) - Py_ssize_t warping_path_ndim(seq_t *from_s, Py_ssize_t from_l, seq_t * to_s, Py_ssize_t to_l, - Py_ssize_t *from_i, Py_ssize_t *to_i, int ndim, DTWSettings * settings) + seq_t dtw_warping_path(seq_t *from_s, Py_ssize_t from_l, seq_t* to_s, Py_ssize_t to_l, + Py_ssize_t *from_i, Py_ssize_t *to_i, Py_ssize_t *length_i, DTWSettings * settings) + seq_t dtw_warping_path_ndim(seq_t *from_s, Py_ssize_t from_l, seq_t * to_s, Py_ssize_t to_l, + Py_ssize_t *from_i, Py_ssize_t *to_i, Py_ssize_t *length_i, int ndim, DTWSettings * settings) void dtw_srand(unsigned int seed) - Py_ssize_t warping_path_prob_ndim(seq_t *from_s, Py_ssize_t from_l, seq_t* to_s, Py_ssize_t to_l, - Py_ssize_t *from_i, Py_ssize_t *to_i, seq_t avg, int ndim, DTWSettings * settings) + seq_t dtw_warping_path_prob_ndim(seq_t *from_s, Py_ssize_t from_l, seq_t* to_s, Py_ssize_t to_l, + Py_ssize_t *from_i, Py_ssize_t *to_i, Py_ssize_t *length_i, seq_t avg, int ndim, DTWSettings * settings) DTWWps dtw_wps_parts(Py_ssize_t l1, Py_ssize_t l2, DTWSettings * settings) seq_t ub_euclidean(seq_t *s1, Py_ssize_t l1, seq_t *s2, Py_ssize_t l2) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 910aea18..5b828c61 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -939,27 +939,31 @@ def distance_matrix_fast(s, max_dist=None, use_pruning=False, max_length_diff=No only_triu=only_triu) -def warping_path(from_s, to_s, **kwargs): +def warping_path(from_s, to_s, include_distance=False, **kwargs): """Compute warping path between two sequences.""" dist, paths = warping_paths(from_s, to_s, **kwargs) path = best_path(paths) + if include_distance: + return path, dist return path -def warping_path_fast(from_s, to_s, **kwargs): +def warping_path_fast(from_s, to_s, include_distance=False, **kwargs): """Compute warping path between two sequences.""" from_s, to_s, settings_kwargs = warping_path_args_to_c(from_s, to_s, **kwargs) - path = dtw_cc.warping_path(from_s, to_s, **settings_kwargs) - return path + result = dtw_cc.warping_path(from_s, to_s, include_distance=include_distance, + **settings_kwargs) + return result -def warping_path_prob(from_s, to_s, avg, use_c=True, **kwargs): +def warping_path_prob(from_s, to_s, avg, include_distance=False, use_c=True, **kwargs): """Compute warping path between two sequences.""" if not use_c: raise AttributeError('warping_path_prob with use_c=False not yet supported') from_s, to_s, settings_kwargs = warping_path_args_to_c(from_s, to_s, **kwargs) - path = dtw_cc.warping_path_prob(from_s, to_s, avg, **settings_kwargs) - return path + result = dtw_cc.warping_path_prob(from_s, to_s, avg, + include_distance=include_distance, **settings_kwargs) + return result def warping_amount(path): diff --git a/dtaidistance/dtw_barycenter.py b/dtaidistance/dtw_barycenter.py index 095e332d..dae1eca1 100644 --- a/dtaidistance/dtw_barycenter.py +++ b/dtaidistance/dtw_barycenter.py @@ -214,7 +214,7 @@ def dba(s, c, mask=None, samples=None, use_c=False, nb_initial_samples=None, **k if ndim == 1: m = dtw_cc.warping_path(c, seq, **kwargs) else: - m = dtw_cc.warping_path_ndim(c, seq, ndim, **kwargs) + m = dtw_cc.warping_path_ndim(c, seq, ndim=ndim, **kwargs) else: if ndim == 1: m = warping_path(c, seq, **kwargs) diff --git a/dtaidistance/dtw_cc.pyx b/dtaidistance/dtw_cc.pyx index 990810db..78800ddf 100644 --- a/dtaidistance/dtw_cc.pyx +++ b/dtaidistance/dtw_cc.pyx @@ -78,6 +78,18 @@ cdef class DTWWps: def __init__(self, l1, l2, DTWSettings settings): self._wps = dtaidistancec_dtw.dtw_wps_parts(l1, l2, &settings._settings) + @property + def ri1(self): + return self._wps.ri1 + + @property + def ri2(self): + return self._wps.ri2 + + @property + def ri3(self): + return self._wps.ri3 + cdef class DTWSettings: def __cinit__(self): @@ -473,7 +485,7 @@ def warping_paths_compact_affinity(seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2, return d -def warping_path(seq_t[:] s1, seq_t[:] s2, **kwargs): +def warping_path(seq_t[:] s1, seq_t[:] s2, include_distance=False, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -483,8 +495,9 @@ def warping_path(seq_t[:] s1, seq_t[:] s2, **kwargs): cdef Py_ssize_t *i2 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i2: raise MemoryError() + d = None try: - path_length = dtaidistancec_dtw.warping_path(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &settings._settings) + d = dtaidistancec_dtw.dtw_warping_path(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &path_length, &settings._settings) path = [] for i in range(path_length): path.append((i1[i], i2[i])) @@ -492,10 +505,12 @@ def warping_path(seq_t[:] s1, seq_t[:] s2, **kwargs): finally: PyMem_Free(i1) PyMem_Free(i2) + if include_distance is True: + return path, d return path -def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1, **kwargs): +def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, include_distance=False, int ndim=1, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -505,8 +520,10 @@ def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1, **kwargs): cdef Py_ssize_t *i2 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i2: raise MemoryError() + d = None try: - path_length = dtaidistancec_dtw.warping_path_ndim(&s1[0, 0], len(s1), &s2[0, 0], len(s2), i1, i2, ndim, &settings._settings) + d = dtaidistancec_dtw.dtw_warping_path_ndim(&s1[0, 0], len(s1), &s2[0, 0], len(s2), + i1, i2, &path_length, ndim, &settings._settings) path = [] for i in range(path_length): path.append((i1[i], i2[i])) @@ -514,6 +531,8 @@ def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1, **kwargs): finally: PyMem_Free(i1) PyMem_Free(i2) + if include_distance is True: + return path, d return path @@ -536,13 +555,38 @@ def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize l1, l2, rb, re, cb, ce, &settings._settings) -def wps_print(seq_t[:, :] wps, **kwargs): +def wps_print(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) + dtaidistancec_dtw.dtw_print_wps(&wps[0,0], l1, l2, &settings._settings) -def wps_print_compact(seq_t[:, :] wps, **kwargs): +def wps_print_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) + dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], l1, l2, &settings._settings) + +def wps_parts(Py_ssize_t l1, Py_ssize_t l2, **kwargs): + settings = DTWSettings(**kwargs) + return DTWWps(l1, l2, settings) + +def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): + cdef Py_ssize_t path_length; + settings = DTWSettings(**kwargs) + cdef Py_ssize_t *i1 = PyMem_Malloc((l1 + l2) * sizeof(Py_ssize_t)) + if not i1: + raise MemoryError() + cdef Py_ssize_t *i2 = PyMem_Malloc((l1 + l2) * sizeof(Py_ssize_t)) + if not i2: + raise MemoryError() + try: + path_length = dtaidistancec_dtw.dtw_best_path(&wps[0, 0], i1, i2, l1, l2, + &settings._settings) + path = [] + for i in range(path_length): + path.append((i1[i], i2[i])) + path.reverse() + finally: + PyMem_Free(i1) + PyMem_Free(i2) + return path def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): cdef Py_ssize_t path_length; @@ -571,7 +615,7 @@ def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, ** def srand(unsigned int seed): dtaidistancec_dtw.dtw_srand(seed) -def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): +def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, include_distance=False, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -581,9 +625,11 @@ def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): cdef Py_ssize_t *i2 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i2: raise MemoryError() + d = None try: - path_length = dtaidistancec_dtw.warping_path_prob_ndim(&s1[0], len(s1), &s2[0], len(s2), i1, i2, - avg, 1, &settings._settings) + d = dtaidistancec_dtw.dtw_warping_path_prob_ndim(&s1[0], len(s1), &s2[0], len(s2), + i1, i2, &path_length, + avg, 1, &settings._settings) path = [] for i in range(path_length): path.append((i1[i], i2[i])) @@ -591,6 +637,8 @@ def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): finally: PyMem_Free(i1) PyMem_Free(i2) + if include_distance: + return path, d return path diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj index 330b1d4b..b29d68cf 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj @@ -712,7 +712,6 @@ OTHER_LDFLAGS = ( "-lcriterion", "-lomp", - "-fopenmp", ); PRODUCT_NAME = "$(TARGET_NAME)"; }; @@ -741,7 +740,6 @@ OTHER_LDFLAGS = ( "-lcriterion", "-lomp", - "-fopenmp", ); PRODUCT_NAME = "$(TARGET_NAME)"; }; diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c index f622d0b3..0e41e2c1 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c @@ -32,18 +32,18 @@ void benchmark13(void); void benchmark1() { int size=10000; - double ra1[size], ra2[size]; - int i; - for (i=0; i=0; i--) { + printf("(%zd, %zd), ", i1[i], i2[i]); + } + printf("]\n"); + + DTWWps p = dtw_wps_parts(l1, l2, &settings); + seq_t * wps = (seq_t *)malloc(sizeof(seq_t) * p.length); + d = dtw_warping_paths(wps, s1, l1, s2, l2, true, true, true, &settings); + dtw_print_wps_compact(wps, l1, l2, &settings); + } void benchmark11() { @@ -398,26 +414,27 @@ void benchmark_affinity() { dtw_print_wps(wps, l1, l2, &settings); DTWWps p = dtw_wps_parts(l1, l2, &settings); -// idx_t rb = 2; -// idx_t re = 8; -// idx_t cb = 4; -// idx_t ce = 8; -// seq_t * wps_slice = (seq_t *)malloc(sizeof(seq_t) * (re-rb)*(ce-cb)); -// for (idx_t i=0; i<(re-rb)*(ce-cb); i++) { -// wps_slice[i] = -INFINITY; -// } -// -// dtw_expand_wps_slice_affinity(wps, wps_slice, l1, l2, rb, re, cb, ce, &settings); -// -// idx_t wpsi = 0; -// for (idx_t r=0; r<(re-rb); r++) { -// printf("[ "); -// for (idx_t c=0; c<(ce-cb); c++) { -// printf("%.2f ", wps_slice[wpsi]); -// wpsi++; -// } -// printf("]\n"); -// } + printf("Slice:\n"); + idx_t rb = 4; + idx_t re = 7; + idx_t cb = 3; + idx_t ce = 6; + seq_t * wps_slice = (seq_t *)malloc(sizeof(seq_t) * (re-rb)*(ce-cb)); + for (idx_t i=0; i<(re-rb)*(ce-cb); i++) { + wps_slice[i] = -INFINITY; + } + + dtw_expand_wps_slice_affinity(wps, wps_slice, l1, l2, rb, re, cb, ce, &settings); + + idx_t wpsi = 0; + for (idx_t r=0; r<(re-rb); r++) { + printf("[ "); + for (idx_t c=0; c<(ce-cb); c++) { + printf("%.2f ", wps_slice[wpsi]); + wpsi++; + } + printf("]\n"); + } // dtw_wps_negativize(&p, wps, 2, 5); // dtw_wps_positivize(&p, wps, 3, 4); @@ -444,6 +461,116 @@ void benchmark_affinity() { dtw_printprecision_reset(); } +void wps_test(void) { + dtw_printprecision_set(0); + + idx_t l1 = 8065; + idx_t l2 = 8065; + idx_t idx; + + DTWSettings settings = dtw_settings_default(); + settings.window = 50; + settings.penalty = 0.0018315638888734178; + idx_t wps_width = dtw_settings_wps_width(l1, l2, &settings); + printf("wps_width=%zu\n", wps_width); + seq_t * wps = (seq_t *)malloc(sizeof(seq_t) * (l2+1)*wps_width); + seq_t * series1 = (seq_t *)malloc(sizeof(seq_t) * l1); + seq_t * series2 = (seq_t *)malloc(sizeof(seq_t) * l2); + + FILE *in_file; + double number; + + // read series + in_file = fopen("/Users/wannes/Projects/Research/2016-DTW/repo_dtw/tests/rsrc/series1.txt", "r"); + if (in_file == NULL) { + printf("Can't open file for reading.\n"); + return; + } + idx = 0; + for (idx_t i=0; ipsi_1b < l1 && settings->psi_1e < l1 && - settings->psi_2b < l2 && settings->psi_2e < l2); + assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && + settings->psi_2b <= l2 && settings->psi_2e <= l2); idx_t ldiff; idx_t dl; // DTWPruned @@ -258,7 +258,7 @@ seq_t dtw_distance(seq_t *s1, idx_t l1, ec = ec_next; // Deal with Psi-relaxation in last column if (settings->psi_1e != 0 && minj == l2 && l1 - 1 - i <= settings->psi_1e) { - assert((i1 + 1)*length - 1 == curidx); + assert(!(settings->window == 0 || settings->window == l2) || (i1 + 1)*length - 1 == curidx); if (dtw[curidx] < psi_shortest) { // curidx is the last value psi_shortest = dtw[curidx]; @@ -304,8 +304,8 @@ Compute the DTW between two n-dimensional series. seq_t dtw_distance_ndim(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim, DTWSettings *settings) { - assert(settings->psi_1b < l1 && settings->psi_1e < l1 && - settings->psi_2b < l2 && settings->psi_2e < l2); + assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && + settings->psi_2b <= l2 && settings->psi_2e <= l2); idx_t ldiff; idx_t dl; // DTWPruned @@ -493,7 +493,7 @@ seq_t dtw_distance_ndim(seq_t *s1, idx_t l1, ec = ec_next; // Deal with Psi-relaxation in last column if (settings->psi_1e != 0 && minj == l2 && l1 - 1 - i <= settings->psi_1e) { - assert((i1 + 1)*length - 1 == curidx); + assert(!(settings->window == 0 || settings->window == l2) || (i1 + 1)*length - 1 == curidx); if (dtw[curidx] < psi_shortest) { // curidx is the last value psi_shortest = dtw[curidx]; @@ -771,11 +771,13 @@ seq_t dtw_warping_paths_ndim(seq_t *wps, // D. Rows: MAX(overlap_left_ri, overlap_right_ri) < ri <= l1 // [x 0 0 0 0] // [x x 0 0 0] - min_ci = MAX(0, p.ri3 + 1 - p.window - p.ldiff); + min_ci = MAX(0, p.ri3 + 1 - p.window - p.ldiff ); wpsi_start = 2; if (p.ri2 == p.ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p.ri3 - p.ri2; } for (ri=p.ri3; riri2 == p->ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p->ri3 - p->ri2; } for (ri=p->ri3+1; riwidth; idx_t min_ci, max_ci; @@ -1811,7 +1827,6 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id min_ci = 0; max_ci = p->window + p->ldiffc + 1; for (ri=1; riri1+1; ri++) { - ci = min_ci; if (ri == r) { *cb = min_ci; *ce = max_ci; @@ -1825,7 +1840,6 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id min_ci = 0; max_ci = l2 + 1; for (ri=p->ri1+1; riri2+1; ri++) { - ci = min_ci; if (ri == r) { *cb = min_ci; *ce = max_ci; @@ -1838,7 +1852,6 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id min_ci = 1; max_ci = 1 + 2 * p->window - 1 + p->ldiff + 1; for (ri=p->ri2+1; riri3+1; ri++) { - ci = min_ci; if (ri == r) { *cb = min_ci; *ce = max_ci; @@ -1856,9 +1869,10 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id if (p->ri2 == p->ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p->ri3 - p->ri2; } for (ri=p->ri3+1; riri2 == p->ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p->ri3 - p->ri2; } for (ri=p->ri3+1; ri p.ri3 && cip > 0) { @@ -2029,7 +2047,7 @@ idx_t dtw_best_path(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, // Go diagonal cip--; rip--; - wpsi--; + wpsi = wpsi - 1; ri_width = ri_widthp; ri_widthp -= p.width; } else if (wps[ri_width + wpsi - 1] <= wps[ri_widthp + wpsi]) { @@ -2146,7 +2164,7 @@ idx_t dtw_best_path_affinity(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l // Go diagonal cip--; rip--; - wpsi--; + wpsi = wpsi - 1; ri_width = ri_widthp; ri_widthp -= p.width; } else if (wps[ri_width + wpsi - 1] >= wps[ri_widthp + wpsi]) { @@ -2273,7 +2291,10 @@ idx_t dtw_best_path_prob(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, s min_ci = p.ri3 + 1 - p.window - p.ldiff; wpsi_start = 2; if (p.ri2 == p.ri3) { + // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p.ri3 - p.ri2; } wpsi = wpsi_start + (l2 - min_ci) - 1; while (rip > p.ri3 && cip > 0) { @@ -2414,18 +2435,18 @@ idx_t dtw_best_path_prob(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, s @return length of path */ -idx_t warping_path(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, DTWSettings * settings) { - return warping_path_ndim(from_s, from_l, to_s, to_l, from_i, to_i, 1, settings); +seq_t dtw_warping_path(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t * length_i, DTWSettings * settings) { + return dtw_warping_path_ndim(from_s, from_l, to_s, to_l, from_i, to_i, length_i, 1, settings); } -idx_t warping_path_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, int ndim, DTWSettings * settings) { - idx_t path_length; +seq_t dtw_warping_path_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t * length_i, int ndim, DTWSettings * settings) { idx_t wps_length = dtw_settings_wps_length(from_l, to_l, settings); seq_t *wps = (seq_t *)malloc(wps_length * sizeof(seq_t)); - dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, false, false, true, ndim, settings); - path_length = dtw_best_path(wps, from_i, to_i, from_l, to_l, settings); + seq_t d = dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, true, false, true, ndim, settings); + d = sqrt(d); + *length_i = dtw_best_path(wps, from_i, to_i, from_l, to_l, settings); free(wps); - return path_length; + return d; } /*! @@ -2433,14 +2454,13 @@ idx_t warping_path_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, id @return length of path */ -idx_t warping_path_prob_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, seq_t avg, int ndim, DTWSettings * settings) { - idx_t path_length; +seq_t dtw_warping_path_prob_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t *length_i, seq_t avg, int ndim, DTWSettings * settings) { idx_t wps_length = dtw_settings_wps_length(from_l, to_l, settings); seq_t *wps = (seq_t *)malloc(wps_length * sizeof(seq_t)); - dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, false, false, true, ndim, settings); - path_length = dtw_best_path_prob(wps, from_i, to_i, from_l, to_l, avg, settings); + seq_t d = dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, false, false, true, ndim, settings); + *length_i = dtw_best_path_prob(wps, from_i, to_i, from_l, to_l, avg, settings); free(wps); - return path_length; + return d; } @@ -3297,10 +3317,18 @@ void dtw_printprecision_reset(void) { /* Helper function for debugging. */ void dtw_print_wps_compact(seq_t * wps, idx_t l1, idx_t l2, DTWSettings* settings) { DTWWps p = dtw_wps_parts(l1, l2, settings); - for (idx_t ri=0; ri<(l1+1); ri++) { + for (idx_t wpsi=0; wpsifn(s1, 9, s2, 9, ¶m->settings); + double d = get_function(param->fn)(s1, 9, s2, 9, ¶m->settings); // printf("d=%f\n", d); cr_assert_float_eq(d, sqrt(2), 0.001); } @@ -61,12 +80,12 @@ ParameterizedTest(struct dtw_test_params *param, dtw, test_series1) { ParameterizedTestParameters(dtw, test_series2) { static struct dtw_test_params params[] = { - {.fn = dtw_distance, .settings={.window=0}, .id=0}, - {.fn = dtw_warping_paths_distance, .settings={.window=0}, .id=1}, - {.fn = dtw_distance, .settings={.window=0, .use_pruning=true}, .id=2}, - {.fn = dtw_distance, .settings={.window=3}, .id=3}, - {.fn = dtw_warping_paths_distance, .settings={.window=3}, .id=4}, - {.fn = dtw_distance, .settings={.window=3, .use_pruning=true}, .id=5} + {.fn = fn_dtw_distance, .settings={.window=0}, .id=0}, + {.fn = fn_dtw_warping_paths_distance, .settings={.window=0}, .id=1}, + {.fn = fn_dtw_distance, .settings={.window=0, .use_pruning=true}, .id=2}, + {.fn = fn_dtw_distance, .settings={.window=3}, .id=3}, + {.fn = fn_dtw_warping_paths_distance, .settings={.window=3}, .id=4}, + {.fn = fn_dtw_distance, .settings={.window=3, .use_pruning=true}, .id=5} }; idx_t nb_params = sizeof (params) / sizeof (struct dtw_test_params); return cr_make_param_array(struct dtw_test_params, params, nb_params); @@ -79,7 +98,7 @@ ParameterizedTest(struct dtw_test_params *param, dtw, test_series2) { dtw_printprecision_set(6); double s1[] = {0., 0.01, 0., 0.01, 0., 0., 0., 0.01, 0.01, 0.02, 0., 0.}; double s2[] = {0., 0.02, 0.02, 0., 0., 0.01, 0.01, 0., 0., 0., 0.}; - double d = param->fn(s1, 12, s2, 11, ¶m->settings); + double d = get_function(param->fn)(s1, 12, s2, 11, ¶m->settings); cr_assert_float_eq(d, 0.02, 0.001); dtw_printprecision_reset(); } @@ -218,12 +237,12 @@ Test(dtwp, test_d_a) { ParameterizedTestParameters(dtw, test_e) { static struct dtw_test_params params[] = { - {.fn = dtw_distance, .settings={.window=0}, .id=0}, - {.fn = dtw_warping_paths_distance, .settings={.window=0}, .id=1}, - {.fn = dtw_distance, .settings={.window=0, .use_pruning=true, .max_dist=0.2}, .id=2}, - {.fn = dtw_warping_paths_distance, .settings={.window=0, .use_pruning=true, .max_dist=0.2}, .id=3} + {.fn = fn_dtw_distance, .settings={.window=0}, .id=0}, + {.fn = fn_dtw_warping_paths_distance, .settings={.window=0}, .id=1}, + {.fn = fn_dtw_distance, .settings={.window=0, .use_pruning=true, .max_dist=0.2}, .id=2}, + {.fn = fn_dtw_warping_paths_distance, .settings={.window=0, .use_pruning=true, .max_dist=0.2}, .id=3} }; - idx_t nb_params = sizeof (params) / sizeof (struct dtw_test_params); + size_t nb_params = sizeof (params) / sizeof (struct dtw_test_params); return cr_make_param_array(struct dtw_test_params, params, nb_params); } @@ -233,7 +252,9 @@ ParameterizedTest(struct dtw_test_params *param, dtw, test_e) { #endif double s1[] = {5.005335029629605081e-01, 5.157722489130834864e-01, 4.804319657333316340e-01, 4.520537745752661318e-01, 4.867408184050183717e-01, 4.806534229629605415e-01, 4.530552579964135518e-01, 4.667067057333316171e-01, 4.567955137333316040e-01, 4.414902037333315876e-01, 4.240597964014319321e-01, 4.225263829008334970e-01, 4.030970017333316280e-01, 4.404482984865574768e-01, 3.852339312962939077e-01, 3.634947117333316435e-01, 3.861488867383516266e-01, 3.413363679008334928e-01, 3.451913457333316004e-01, 3.695692377333316680e-01, 3.434781337333315809e-01, 3.063217006568062506e-01, 2.845283817333316145e-01, 2.955394357333315791e-01, 3.151374838781335619e-01, 2.561411067352764026e-01, 2.301194263297469400e-01, 2.478605028202762184e-01, 1.972828198566299318e-01, 2.150545617333316228e-01, 2.232865857333316273e-01, 2.492665580680986370e-01, 2.144049374050155388e-01, 2.079081117333316520e-01, 1.879600957333316391e-01, 1.638555197333316227e-01, 1.425566689000865583e-01, 2.016327177333316067e-01, 2.290943870240647606e-01, 1.900932117333316296e-01, 1.503233018025057766e-01, 1.970833717333316248e-01, 1.999393777333316191e-01, 2.018818837333316019e-01, 2.554168153357214144e-01, 2.345002377333316179e-01, 2.407103957333316113e-01, 2.762874997333316096e-01, 3.059693477333316203e-01, 3.328774862341668528e-01, 3.583867537333316200e-01, 3.743879884050183016e-01, 4.266385131705089373e-01, 4.445410410742424712e-01, 4.642271795675002033e-01, 4.402678696630802357e-01, 4.814591396296271641e-01, 5.317886460815400840e-01, 5.548714817383517683e-01, 5.062713000716849709e-01, 5.431524597333317050e-01, 5.537961812962939323e-01, 5.720852595675002261e-01, 5.933977447347652534e-01, 5.845479257333316969e-01, 6.133363017333317568e-01, 6.276481431102108877e-01, 6.132085097333317414e-01, 5.922371597333316862e-01, 5.778388756463566089e-01}; double s2[] = {5.584292601075275808e-01, 5.214504501075275522e-01, 4.877978901075275542e-01, 5.078206201075274873e-01, 4.769738701075275644e-01, 4.478925501075275428e-01, 4.242528301075275676e-01, 4.307546401075275644e-01, 4.370594201075275187e-01, 4.331284101075275617e-01, 4.810766301075275475e-01, 4.250942801075275335e-01, 3.973955801075275684e-01, 4.380910701075275693e-01, 3.786794801075275552e-01, 3.850050201075275180e-01, 3.576176301075275621e-01, 2.987050201075275302e-01, 3.377542001075275468e-01, 3.262601401075275187e-01, 3.278248801075275276e-01, 3.347294101075275474e-01, 3.222199801075275594e-01, 3.372712101075275304e-01, 2.526810801075275448e-01, 1.774206901075275622e-01, 2.384015601075275825e-01, 2.419624201075275816e-01, 1.694136001075275677e-01, 1.983933401075275715e-01, 2.272449101075275646e-01, 1.490059201075275563e-01, 1.416013701075275744e-01, 1.997542401075275698e-01, 1.791462801075275613e-01, 1.712680901075275819e-01, 1.851759601075275707e-01, 1.450854801075275591e-01, 1.041379601075275718e-01, 9.028068310752757064e-02, 1.358144301075275839e-01, 2.006444701075275616e-01, 2.003521501075275768e-01, 2.100136501075275663e-01, 2.521797401075275280e-01, 2.364524601075275734e-01, 2.236850301075275771e-01, 2.873612101075275205e-01, 3.358473801075275156e-01, 3.288144201075275386e-01, 3.195859301075275605e-01, 3.482947201075275445e-01, 4.032929801075275655e-01, 4.566962501075275682e-01, 5.173766201075274962e-01, 5.463256501075275384e-01, 5.172673701075275465e-01, 5.054312901075275200e-01, 5.344046101075274890e-01, 5.389180101075274898e-01, 5.188896901075275014e-01, 5.484243401075274971e-01, 5.899157901075275934e-01, 5.987863201075275255e-01, 6.357147701075275270e-01, 6.277379101075275525e-01, 5.519873201075274904e-01, 5.634240801075275362e-01, 6.307956401075275332e-01, 6.488636001075275272e-01}; - double d = param->fn(s1, 70, s2, 70, ¶m->settings); + DTWFnPtr fn = get_function(param->fn); + DTWSettings settings = param->settings; + double d = fn(s1, 70, s2, 70, &settings); // printf("d=%f\n", d); cr_assert_float_eq(d, 0.19430270196116387, 0.001); } @@ -411,11 +432,12 @@ Test(wps, test_e_a) { DTWSettings settings = dtw_settings_default(); idx_t i1[l1+l2]; idx_t i2[l1+l2]; + idx_t length_i; for (idx_t i=0; i=0; i--) { +// printf("(%zd, %zd), ", i1[i], i2[i]); +// } +// printf("]\n"); + for (int i=0; i=0; i--) { +// printf("(%zd, %zd), ", i1[i], i2[i]); +// } +// printf("]\n"); + for (int i=0; i=0; i--) { +// printf("(%zd, %zd), ", i1[i], i2[i]); +// } +// printf("]\n"); + for (int i=0; iri2 == p->ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p->ri3 - p->ri2; } for (ri=p->ri3+1; riwidth; idx_t min_ci, max_ci; @@ -394,7 +396,6 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id min_ci = 0; max_ci = p->window + p->ldiffc + 1; for (ri=1; riri1+1; ri++) { - ci = min_ci; if (ri == r) { *cb = min_ci; *ce = max_ci; @@ -408,7 +409,6 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id min_ci = 0; max_ci = l2 + 1; for (ri=p->ri1+1; riri2+1; ri++) { - ci = min_ci; if (ri == r) { *cb = min_ci; *ce = max_ci; @@ -421,7 +421,6 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id min_ci = 1; max_ci = 1 + 2 * p->window - 1 + p->ldiff + 1; for (ri=p->ri2+1; riri3+1; ri++) { - ci = min_ci; if (ri == r) { *cb = min_ci; *ce = max_ci; @@ -439,9 +438,10 @@ idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, id if (p->ri2 == p->ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p->ri3 - p->ri2; } for (ri=p->ri3+1; riri2 == p->ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p->ri3 - p->ri2; } for (ri=p->ri3+1; ri p.ri3 && cip > 0) { @@ -764,18 +769,18 @@ idx_t dtw_best_path_prob(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, s @return length of path */ -idx_t warping_path(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, DTWSettings * settings) { - return warping_path_ndim(from_s, from_l, to_s, to_l, from_i, to_i, 1, settings); +seq_t dtw_warping_path(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t * length_i, DTWSettings * settings) { + return dtw_warping_path_ndim(from_s, from_l, to_s, to_l, from_i, to_i, length_i, 1, settings); } -idx_t warping_path_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, int ndim, DTWSettings * settings) { - idx_t path_length; +seq_t dtw_warping_path_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t * length_i, int ndim, DTWSettings * settings) { idx_t wps_length = dtw_settings_wps_length(from_l, to_l, settings); seq_t *wps = (seq_t *)malloc(wps_length * sizeof(seq_t)); - dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, false, false, true, ndim, settings); - path_length = dtw_best_path(wps, from_i, to_i, from_l, to_l, settings); + seq_t d = dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, true, false, true, ndim, settings); + d = sqrt(d); + *length_i = dtw_best_path(wps, from_i, to_i, from_l, to_l, settings); free(wps); - return path_length; + return d; } /*! @@ -783,14 +788,13 @@ idx_t warping_path_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, id @return length of path */ -idx_t warping_path_prob_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, seq_t avg, int ndim, DTWSettings * settings) { - idx_t path_length; +seq_t dtw_warping_path_prob_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t *length_i, seq_t avg, int ndim, DTWSettings * settings) { idx_t wps_length = dtw_settings_wps_length(from_l, to_l, settings); seq_t *wps = (seq_t *)malloc(wps_length * sizeof(seq_t)); - dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, false, false, true, ndim, settings); - path_length = dtw_best_path_prob(wps, from_i, to_i, from_l, to_l, avg, settings); + seq_t d = dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, false, false, true, ndim, settings); + *length_i = dtw_best_path_prob(wps, from_i, to_i, from_l, to_l, avg, settings); free(wps); - return path_length; + return d; } @@ -1146,10 +1150,18 @@ void dtw_printprecision_reset(void) { /* Helper function for debugging. */ void dtw_print_wps_compact(seq_t * wps, idx_t l1, idx_t l2, DTWSettings* settings) { DTWWps p = dtw_wps_parts(l1, l2, settings); - for (idx_t ri=0; ri<(l1+1); ri++) { + for (idx_t wpsi=0; wpsipsi_1b < l1 && settings->psi_1e < l1 && - settings->psi_2b < l2 && settings->psi_2e < l2); + assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && + settings->psi_2b <= l2 && settings->psi_2e <= l2); idx_t ldiff; idx_t dl; // DTWPruned @@ -227,7 +227,7 @@ seq_t dtw_distance{{ suffix }}(seq_t *s1, idx_t l1, ec = ec_next; // Deal with Psi-relaxation in last column if (settings->psi_1e != 0 && minj == l2 && l1 - 1 - i <= settings->psi_1e) { - assert((i1 + 1)*length - 1 == curidx); + assert(!(settings->window == 0 || settings->window == l2) || (i1 + 1)*length - 1 == curidx); if (dtw[curidx] < psi_shortest) { // curidx is the last value psi_shortest = dtw[curidx]; diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c index 05ea0f22..c22d8c89 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c @@ -130,6 +130,8 @@ void dtw_expand_wps_slice{{suffix}}(seq_t *wps, seq_t *full, if (p.ri2 == p.ri3) { // C is skipped wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p.ri3 - p.ri2; } for (ri=MAX(rbs, p.ri3); ri Date: Wed, 28 Dec 2022 23:49:45 +0100 Subject: [PATCH 32/59] add tests --- tests/test_warping.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_warping.py b/tests/test_warping.py index 5fea3ba8..c54fcc95 100644 --- a/tests/test_warping.py +++ b/tests/test_warping.py @@ -258,12 +258,18 @@ def test_twoleadecg_1(): wps_fast_best_path = dtw.best_path(wps_fast) d_fast, wps_fast_c = dtw.warping_paths_fast(s1, s2, compact=True, **kwargs) wps_fast_c_best_path = dtw.dtw_cc.best_path_compact(wps_fast_c, len(s1), len(s2), **kwargs) + path4, d4 = dtw.warping_path(s1, s2, include_distance=True, **kwargs) + path5, d5 = dtw.warping_path_fast(s1, s2, include_distance=True, **kwargs) assert str(wps_best_path) == str(wps_fast_best_path) assert str(wps_best_path) == str(wps_fast_c_best_path) + assert str(wps_best_path) == str(path4) + assert str(wps_best_path) == str(path5) np.testing.assert_allclose(wps, wps_fast) assert str(path) == str(path_fast) assert d == pytest.approx(d_fast) + assert d == pytest.approx(d4) + assert d == pytest.approx(d5) @numpyonly def test_subsequence(): From af476ed81c34f86262d069fae1ce070a5b00c6af Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 18 Jan 2023 11:07:32 +0100 Subject: [PATCH 33/59] Update CITATION.cff --- CITATION.cff | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CITATION.cff b/CITATION.cff index 5b9f35cd..0bd8675d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -13,6 +13,12 @@ authors: - family-names: "Robberechts" given-names: "Pieter" orcid: "https://orcid.org/0000-0002-3734-0047" +- family-names: "Blockeel" + given-names: "Hendrik" + orcid: "https://orcid.org/0000-0003-0378-3699" +- family-names: "Davis" + given-names: "Jesse" + orcid: "https://orcid.org/0000-0002-3748-9263" title: "DTAIDistance" version: 2 doi: 10.5281/zenodo.3981067 From 35bae22b3b28511a477cf73f41b6176c5f0af844 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 18 Jan 2023 11:08:35 +0100 Subject: [PATCH 34/59] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e2191020..96de4a4e 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Example: Citing this work: -> Wannes Meert, Kilian Hendrickx, Toon Van Craenendonck & Pieter Robberechts. +> Wannes Meert, Kilian Hendrickx, Toon Van Craenendonck, Pieter Robberechts, Hendrik Blockeel & Jesse Davis. > DTAIDistance (Version v2). Zenodo. > http://doi.org/10.5281/zenodo.5901139 From c9881d1d119f90a1c2733e5d671451cd60d3c393 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 18 Jan 2023 11:11:06 +0100 Subject: [PATCH 35/59] Update AUTHORS --- AUTHORS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index b305a612..d273574b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -5,7 +5,9 @@ Other contributors, listed alphabetically, are: * Aras Yurtman (KU Leuven) * Erlend Kvinge Jørgensen (Equanostic.com) * Gust Verbruggen (KU Leuven) +* Hendrik Blockeel (KU Leuven) * HendrikHuel (github.com/HendrikHuel) +* Jesse Davis (KU Leuven) * Killian Hendrickx (Siemens PLM Software, KU Leuven) * Lars Haalck (University of Münster) * Marco Rossi (github.com/m-rossi) From 8e2b8a94a9fb6bf67afa52aabe4c9a0238a23cd7 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 20 Feb 2023 15:29:56 +0100 Subject: [PATCH 36/59] Improved lower bounds and knn --- Makefile | 2 +- docs/usage/subsequence.rst | 6 +- dtaidistance/dtw.py | 15 ++-- .../DTAIDistanceC.xcodeproj/project.pbxproj | 6 ++ .../DTAIDistanceC/dd_benchmark.c | 52 +++++++----- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c | 38 ++++----- .../DTAIDistanceC/dd_tests_dtw.c | 16 ++++ dtaidistance/subsequence/dtw.py | 42 ++++++---- tests/test_subsequence.py | 84 ++++++++++++++----- 9 files changed, 168 insertions(+), 93 deletions(-) diff --git a/Makefile b/Makefile index 984423cf..1930b4a5 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ benchmark-clustering: .PHONY: benchmark-subseqsearch benchmark-subseqsearch: - export PYTHONPATH=.;py.test -k subseqsearch_eeg ${BENCHMARKSETTINGS} + export PYTHONPATH=.;py.test -k test_dtw_subseqsearch_eeg_lb ${BENCHMARKSETTINGS} .PHONY: clean diff --git a/docs/usage/subsequence.rst b/docs/usage/subsequence.rst index aa772175..6e49652d 100644 --- a/docs/usage/subsequence.rst +++ b/docs/usage/subsequence.rst @@ -52,11 +52,11 @@ If you want to find all matches (or the k best): :alt: Subsequence alignment k-best matches -DTW subsequence search -~~~~~~~~~~~~~~~~~~~~~~ +DTW subsequence search (KNN) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Similar to using alignment, we can also iterate over a sequence of series or windows -to search for the best match: +to search for the best match, or best k matches (k-Nearest Neighbors): :: diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 5b828c61..98b6c176 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -154,24 +154,27 @@ def __str__(self): def lb_keogh(s1, s2, window=None, max_dist=None, max_step=None, max_length_diff=None, use_c=False): """Lowerbound LB_KEOGH""" - # TODO: This implementation slower than distance() in C + if use_c: + return dtw_cc.lb_keogh(s1, s2, window=window, max_dist=max_dist, max_step=max_step) if window is None: window = max(len(s1), len(s2)) t = 0 + imin_diff = max(0, len(s1) - len(s2)) + window - 1 + imax_diff = max(0, len(s2) - len(s1)) + window for i in range(len(s1)): - imin = max(0, i - max(0, len(s1) - len(s2)) - window + 1) - imax = min(len(s2), i + max(0, len(s2) - len(s1)) + window) + imin = max(0, i - imin_diff) + imax = min(len(s2), i + imax_diff) ui = array_max(s2[imin:imax]) li = array_min(s2[imin:imax]) ci = s1[i] if ci > ui: - t += abs(ci - ui) + t += (ci - ui)**2 elif ci < li: - t += abs(ci - li) + t += (ci - li)**2 else: pass - return t + return math.sqrt(t) def ub_euclidean(s1, s2): diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj index b29d68cf..208091bf 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj @@ -84,6 +84,7 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + C01B248429A3B0C00050C980 /* DTAIDistanceCBenchmark.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = DTAIDistanceCBenchmark.entitlements; sourceTree = ""; }; C043A85224BC69AC00BFCF3E /* dd_ed.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dd_ed.h; sourceTree = ""; }; C043A85324BC69AC00BFCF3E /* dd_ed.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_ed.c; sourceTree = ""; }; C043A85924BC6A2D00BFCF3E /* dd_globals.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dd_globals.h; sourceTree = ""; }; @@ -146,6 +147,7 @@ C06FD1CA24A652B400892537 = { isa = PBXGroup; children = ( + C01B248429A3B0C00050C980 /* DTAIDistanceCBenchmark.entitlements */, C06FD1D524A652B400892537 /* DTAIDistanceC */, C06FD1D424A652B400892537 /* Products */, ); @@ -588,7 +590,9 @@ C06FD1FB24A94A2600892537 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { + CODE_SIGN_ENTITLEMENTS = DTAIDistanceCBenchmark.entitlements; CODE_SIGN_IDENTITY = ""; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = 2462T87J45; ENABLE_HARDENED_RUNTIME = YES; @@ -612,7 +616,9 @@ C06FD1FC24A94A2600892537 /* Release */ = { isa = XCBuildConfiguration; buildSettings = { + CODE_SIGN_ENTITLEMENTS = DTAIDistanceCBenchmark.entitlements; CODE_SIGN_IDENTITY = ""; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = 2462T87J45; ENABLE_HARDENED_RUNTIME = YES; diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c index 0e41e2c1..736febe6 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c @@ -31,16 +31,22 @@ void benchmark13(void); void benchmark1() { - int size=10000; - double ra1[size], ra2[size]; - int i; - for (i=0; i l2) { - ldiff12 -= l2; - if (ldiff12 > window) { - ldiff12 -= window; - } else { - ldiff12 = 0; - } - } else { - ldiff12 = 0; + idx_t imin_diff = window - 1; + if (l1 > l2) { + imin_diff += l1 - l2; } - idx_t ldiff21 = l2 + window; - if (ldiff21 > l1) { - ldiff21 -= l1; - } else { - ldiff21 = 0; + idx_t imax_diff = window; + if (l1 < l2) { + imax_diff += l2 - l1; } for (idx_t i=0; i ldiff12) { - imin = i - ldiff12; + if (i > imin_diff) { + imin = i - imin_diff; } else { imin = 0; } - imax = MAX(l2, ldiff21); + imax = i + imax_diff; + if (imax > l2) { + imax = l2; + } ui = 0; for (idx_t j=imin; j ui) { @@ -2613,12 +2607,12 @@ seq_t lb_keogh(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings) } ci = s1[i]; if (ci > ui) { - t += ci - ui; + t += (ci - ui)*(ci - ui); } else if (ci < li) { - t += li - ci; + t += (li - ci)*(li - ci); } } - return t; + return sqrt(t); } diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_tests_dtw.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_tests_dtw.c index 4d5f7570..4364b6ef 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_tests_dtw.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_tests_dtw.c @@ -795,3 +795,19 @@ Test(dba, test_a_ptrs) { free(s); } + +//---------------------------------------------------- +// MARK: BOUNDS + +Test(bounds, test_keogh_lb_1) { +#ifdef SKIPALL + cr_skip_test(); +#endif + int size=4; + double ra1[] = {1., 2, 1, 3}; + double ra2[] = {3., 4, 3, 0}; + DTWSettings settings = dtw_settings_default(); + settings.window=2; + double d = lb_keogh(ra1, size, ra2, size, &settings); + cr_assert_float_eq(d, 2.23606797749979, 0.001); +} diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index db484ece..61e3f19c 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -494,21 +494,23 @@ def best_path(self, row, col): return p -def subsequence_search(query, series, dists_options=None, use_lb=False, keep_all_distances=False, - max_dist=None, max_value=None): + +def subsequence_search(query, series, dists_options=None, use_lb=True, + max_dist=None, max_value=None, use_c=None): """See SubsequenceSearch. :param query: Time series to search for :param series: Iterator over time series to perform search on. This can be for example windows over a long time series. :param dists_options: Options passed on to `dtw.distance` + :param use_lb: Use lowerbounds to early abandon options :param max_dist: Ignore DTW distances larger than this value :param max_value: Ignore normalized DTW distances larger than this value + :param use_c: Use fast C implementation if available :return: SubsequenceSearch object """ ss = SubsequenceSearch(query, series, dists_options=dists_options, use_lb=use_lb, - keep_all_distances=keep_all_distances, - max_dist=max_dist, max_value=max_value) + max_dist=max_dist, max_value=max_value, use_c=use_c) return ss @@ -566,16 +568,16 @@ def __str__(self): return '[' + ', '.join(str(m) for m in self) + ']' - class SubsequenceSearch: - def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distances=False, - max_dist=None, max_value=None): + def __init__(self, query, s, dists_options=None, use_lb=True, keep_all_distances=False, + max_dist=None, max_value=None, use_c=None): """Search the best matching (subsequence) time series compared to a given time series. :param query: Time series to search for :param s: Iterator over time series to perform search on. This can be for example windows over a long time series. :param dists_options: Options passed on to `dtw.distance` + :param use_lb: Use lowerbounds to early abandon options :param max_dist: Ignore DTW distances larger than this value if max_dist is also given in dists_options, then the one in dists_options is ignored if both max_dist and max_value are given, the smallest is used @@ -595,20 +597,23 @@ def __init__(self, query, s, dists_options=None, use_lb=False, keep_all_distance if max_value is not None: self.max_dist = min(self.max_dist, max_value * len(self.query)) self.dists_options['max_dist'] = self.max_dist + if use_c is not None: + self.dists_options['use_c'] = use_c self.use_lb = use_lb + self.keep_all_distances = keep_all_distances - if self.use_lb and not self.keep_all_distances: - raise ValueError("If use_lb is true, then keep_all_distances should also be true.") + # if self.use_lb and not self.keep_all_distances: + # raise ValueError("If use_lb is true, then keep_all_distances should also be true.") def reset(self): self.distances = None self.kbest_distances = None self.lbs = None - def compute_lbs(self): - self.lbs = np.zeros((len(self.s),)) - for idx, series in enumerate(self.s): - self.lbs[idx] = dtw.lb_keogh(self.query, series, **self.dists_options) + # def compute_lbs(self): + # self.lbs = np.zeros((len(self.s),)) + # for idx, series in enumerate(self.s): + # self.lbs[idx] = dtw.lb_keogh(self.query, series, **self.dists_options) def align_fast(self, k=None): self.dists_options['use_c'] = True @@ -619,14 +624,16 @@ def align(self, k=None): return if k is None or self.keep_all_distances: self.distances = np.zeros((len(self.s),)) - if self.use_lb: - self.compute_lbs() + # if self.use_lb: + # self.compute_lbs() import heapq h = [(-np.inf, -1)] max_dist = self.max_dist for idx, series in enumerate(self.s): - if self.use_lb and self.lbs[idx] > max_dist: - continue + if self.use_lb: + lb = dtw.lb_keogh(self.query, series, **self.dists_options) + if lb > max_dist: + continue dist = dtw.distance(self.query, series, **self.dists_options) if k is not None: if len(h) < k: @@ -648,6 +655,7 @@ def align(self, k=None): self.kbest_distances = [(self.distances[i], i) for i in np.argsort(self.distances)] self.k = k + return self.kbest_distances def get_ith_value(self, i): """Return the i-th value from the k-best values. diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index 7d655622..d802149f 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -9,6 +9,7 @@ subsequence_search from dtaidistance import dtw_visualisation as dtwvis from dtaidistance.exceptions import MatplotlibException +from dtaidistance.dtw import lb_keogh directory = None numpyonly = pytest.mark.skipif("util_numpy.test_without_numpy()") @@ -211,17 +212,21 @@ def test_dtw_localconcurrences_short(): plt.close(fig) -def create_data_subseqsearch_eeg(np, dtype=None): +def create_data_subseqsearch_eeg(np, dtype=None, longer=False): + window_extra = 200 if longer else 0 data_fn = Path(__file__).parent / 'rsrc' / 'EEGRat_10_1000.txt' data = np.loadtxt(data_fn) - series = np.array(data[1500:1700], dtype=dtype) - query = np.array(data[1331:1352], dtype=dtype) + if longer: + series = np.array(data[:], dtype=dtype) + else: + series = np.array(data[1500:1700], dtype=dtype) + query = np.array(data[1331:1352+window_extra], dtype=dtype) # print(f'{len(series)=}') k = 3 s = [] s_idx = [] - w = 22 # window size + w = 22+window_extra # window size ws = int(np.floor(w / 2)) # shift size wn = int(np.floor((len(series) - (w - ws)) / ws)) si, ei = 0, w @@ -237,36 +242,30 @@ def create_data_subseqsearch_eeg(np, dtype=None): def test_dtw_subseqsearch_eeg2(): with util_numpy.test_uses_numpy() as np: query, s, k, series, s_idx = create_data_subseqsearch_eeg(np) - sa = subsequence_search(query, s, dists_options={'use_c': True}, - keep_all_distances=False) + sa = subsequence_search(query, s, dists_options={'use_c': True}) best = sa.kbest_matches_fast(k=k) assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best) assert sa.distances is None - sa = subsequence_search(query, s, dists_options={'use_c': True}, - keep_all_distances=False) + sa = subsequence_search(query, s, dists_options={'use_c': True}) best = sa.kbest_matches_fast(k=1) assert str(best) == "[SSMatch(15)]", str(best) - sa = subsequence_search(query, s, dists_options={'use_c': True}, - keep_all_distances=False) + sa = subsequence_search(query, s, dists_options={'use_c': True}) best = sa.kbest_matches_fast(k=None) assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4), SSMatch(11), SSMatch(6) ... SSMatch(14), SSMatch(10), SSMatch(9), SSMatch(1), SSMatch(3)]", str(best) assert best[0].value == pytest.approx(0.08045349583339727) - sa = subsequence_search(query, s, dists_options={'use_c': True, 'max_dist': 0.0805 * len(query)}, - keep_all_distances=False) + sa = subsequence_search(query, s, dists_options={'use_c': True, 'max_dist': 0.0805 * len(query)}) best = sa.kbest_matches_fast(k=k) assert str(best) == "[SSMatch(15)]", str(best) - sa = subsequence_search(query, s, max_value=0.0805, - keep_all_distances=False) + sa = subsequence_search(query, s, max_value=0.0805) best = sa.kbest_matches_fast(k=k) assert str(best) == "[SSMatch(15)]", str(best) - sa = subsequence_search(query, s, max_dist=0.0805 * len(query), - keep_all_distances=False) + sa = subsequence_search(query, s, max_dist=0.0805 * len(query)) best = sa.kbest_matches_fast(k=k) assert str(best) == "[SSMatch(15)]", str(best) @@ -334,12 +333,14 @@ def run(): @numpyonly @pytest.mark.benchmark(group="subseqsearch_eeg") -def test_dtw_subseqsearch_eeg_ub(benchmark): +@pytest.mark.parametrize("use_c,use_lb", [(False, False), (True, False), (False, True), (True, True)]) +def test_dtw_subseqsearch_eeg_lb(benchmark, use_c, use_lb): with util_numpy.test_uses_numpy() as np: - query, s, k, series, s_idx = create_data_subseqsearch_eeg(np) + query, s, k, series, s_idx = create_data_subseqsearch_eeg(np, longer=True) + k = 1 def run(): - sa = subsequence_search(query, s, dists_options={'use_c': True}, use_lb=True) + sa = subsequence_search(query, s, use_c=use_c, use_lb=use_lb) best = sa.kbest_matches_fast(k=k) return best if benchmark is None: @@ -353,6 +354,40 @@ def run(): # print(best) +@numpyonly +@pytest.mark.benchmark(group="test_eeg_lb") +@pytest.mark.parametrize("use_c", [False, True]) +def test_eeg_lb(benchmark, use_c): + with util_numpy.test_uses_numpy() as np: + query, s, k, series, s_idx = create_data_subseqsearch_eeg(np, longer=False) + k = 1 + + def run(): + lb = [] + for serie in s: + lb.append(lb_keogh(query, serie, use_c=use_c)) + return lb + if benchmark is None: + tic = time.perf_counter() + lb = run() + toc = time.perf_counter() + print("Lowerbound performed in {:0.4f} seconds: {}".format(toc - tic, lb)) + else: + best = benchmark(run) + # print(sa.distances) + # print(best) + + +@numpyonly +@pytest.mark.parametrize("use_c", [False, True]) +def test_lb1(use_c): + with util_numpy.test_uses_numpy() as np: + a = np.array([1., 2, 1, 3]) + b = np.array([3., 4, 3, 0]) + lb = lb_keogh(a, b, window=2, use_c=use_c) + assert lb == pytest.approx(2.23606797749979) + + if __name__ == "__main__": directory = Path(os.environ.get('TESTDIR', Path(__file__).parent)) print("Saving files to {}".format(directory)) @@ -363,7 +398,14 @@ def run(): # test_dtw_subseq_bug1() # test_dtw_subseq_ndim() # test_dtw_localconcurrences_eeg() - test_dtw_subseqsearch_eeg2() + # test_dtw_subseqsearch_eeg2() + # test_lc_pat1() + # test_lc_pat2() + # import cProfile + # cProfile.run('test_lc_pat1()') + # test_dtw_subseqsearch_eeg2() # test_dtw_subseqsearch_eeg(benchmark=None) - # test_dtw_subseqsearch_eeg_ub(benchmark=None) + test_dtw_subseqsearch_eeg_lb(benchmark=None, use_c=True, use_lb=False) + # test_eeg_lb(benchmark=None, use_c=False) # test_dtw_localconcurrences_short() + # test_lb1(use_c=True) From f21c57c19482d45899dd27260fabfb9758dcb8ea Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 10 Apr 2023 22:52:45 +0200 Subject: [PATCH 37/59] Docs similarity --- docs/index.rst | 1 + docs/usage/similarity.rst | 53 ++++++++++++++++++++++++++++++++++++++ dtaidistance/similarity.py | 4 +-- 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 docs/usage/similarity.rst diff --git a/docs/index.rst b/docs/index.rst index 95cbb7c6..daed7de0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,6 +29,7 @@ Source available on https://github.com/wannesm/dtaidistance. usage/clustering usage/subsequence usage/sequence + usage/similarity usage/changelist diff --git a/docs/usage/similarity.rst b/docs/usage/similarity.rst new file mode 100644 index 00000000..23062e19 --- /dev/null +++ b/docs/usage/similarity.rst @@ -0,0 +1,53 @@ +Similarity +---------- + +Instead of expressing a distance, thus how far two instances are apart, +one can also express a similarity, how close two instances are. +Whereas a distance is larger than zero and have no upperbound, +similarity is between 0 and 1. + +Some methods require as input a similarity instead of a distance +(e.g., spectral clustering). Therefore, it might be useful to translate +the computed distances to a similarity. There are different approaches +to achieve this that are supported by dtaidistance: exponential, +Gaussian, reciprocal, reverse. + +For example, given a set of series (the rows) for which we want to compute the +pairwise similarity based on dynamic time warping: + +.. code-block:: python + + from dtaidistance import dtw, similarity + s = np.array([[0., 0, 1, 2, 1, 0, 1, 0, 0], + [0., 1, 2, 0, 0, 0, 0, 0, 0], + [1., 2, 0, 0, 0, 0, 0, 1, 1], + [0., 0, 1, 2, 1, 0, 1, 0, 0], + [0., 1, 2, 0, 0, 0, 0, 0, 0], + [1., 2, 0, 0, 0, 0, 0, 1, 1]]) + sim = similarity.distance_to_similarity(dtw.distance_matrix(s)) + +The result is: + +.. code-block:: python + + [[1.00 0.53 0.37 1.00 0.53 0.37] + [0.53 1.00 0.46 0.53 1.00 0.46] + [0.37 0.46 1.00 0.37 0.46 1.00] + [1.00 0.53 0.37 1.00 0.53 0.37] + [0.53 1.00 0.46 0.53 1.00 0.46] + [0.37 0.46 1.00 0.37 0.46 1.00]] + +You can observe that the diagonal is all ones because each series +is similar to itself. And the series at index 0 and 3 are identical, +thus also resulting in a similarity of 1. + +If you want to use a different conversion than the default exponential +by using the method argument. + +.. code-block:: python + + distance_to_similarity(distances, method='exponential') + distance_to_similarity(distances, method='gaussian') + distance_to_similarity(distances, method='reciprocal') + distance_to_similarity(distances, method='reverse') + diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index 10639f55..bfba1b9f 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -7,7 +7,7 @@ def distance_to_similarity(D, r=None, method='exponential'): """Transform a distance matrix to a similarity matrix. - The avaiable methods are: + The available methods are: - Exponential: e^(-D / r) r is max(D) if not given - Gaussian: e^(-D^2 / r^2) @@ -25,7 +25,7 @@ def distance_to_similarity(D, r=None, method='exponential'): :param D: The distance matrix :param r: A scaling or smoothing parameter. - :param method: One of 'exponential', 'reciprocal', 'reverse' + :param method: One of 'exponential', 'gaussian', 'reciprocal', 'reverse' :return: Similarity matrix S """ method = method.lower() From 42e4516a9b29bbf28e1c96cc21777cd00ce0bf8a Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 10 Apr 2023 23:18:44 +0200 Subject: [PATCH 38/59] test --- tests/test_subsequence.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index d802149f..9be7a0eb 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -121,6 +121,17 @@ def test_dtw_subseq_ndim(): assert m.value == pytest.approx(0.07071067811865482) +@numpyonly +def test_dtw_subseq_ndim2(): + use_c = False + with util_numpy.test_uses_numpy() as np: + s = [np.array([[1., 1], [2, 2], [3, 3]]), + np.array([[2, 2], [3, 3], [1, 1]])] + query = np.array([[2.0, 2.1], [3.1, 3.0]]) + sa = subsequence_search(query, s) + print(sa.best_match()) + + @pytest.mark.skip @numpyonly def test_dtw_localconcurrences_eeg(): @@ -397,6 +408,7 @@ def test_lb1(use_c): # test_dtw_subseq_eeg() # test_dtw_subseq_bug1() # test_dtw_subseq_ndim() + test_dtw_subseq_ndim2() # test_dtw_localconcurrences_eeg() # test_dtw_subseqsearch_eeg2() # test_lc_pat1() @@ -405,7 +417,7 @@ def test_lb1(use_c): # cProfile.run('test_lc_pat1()') # test_dtw_subseqsearch_eeg2() # test_dtw_subseqsearch_eeg(benchmark=None) - test_dtw_subseqsearch_eeg_lb(benchmark=None, use_c=True, use_lb=False) + # test_dtw_subseqsearch_eeg_lb(benchmark=None, use_c=True, use_lb=False) # test_eeg_lb(benchmark=None, use_c=False) # test_dtw_localconcurrences_short() # test_lb1(use_c=True) From a70cd49dc6442041a47a5c94348bf40ce9f3e2c0 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 11 Apr 2023 12:47:17 +0200 Subject: [PATCH 39/59] similarity doc --- docs/usage/similarity.rst | 36 ++++++++++++++++++++++++++++++++++++ dtaidistance/similarity.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/docs/usage/similarity.rst b/docs/usage/similarity.rst index 23062e19..419ca1ba 100644 --- a/docs/usage/similarity.rst +++ b/docs/usage/similarity.rst @@ -51,3 +51,39 @@ by using the method argument. distance_to_similarity(distances, method='reciprocal') distance_to_similarity(distances, method='reverse') +When reapplying the distance_to_similarity function over multiple matrices, it is advised +to set the r argument manually (or extract them using the return_params +option). Otherwise they are computed based on +the given distance matrix and will be different from call to call. + +Squashing +~~~~~~~~~ + +Similarity reverses high values to low and low to high. If you want to +maintain the direction but squash the distances between 0 and 1, you can +use the squash function (based on Vercruyssen et al., Semi-supervised anomaly detection with an application to +water analytics, ICDM, 2018). + +.. code-block:: python + + similarity.squash(dtw.distance_matrix(s)) + +Which results in: + +.. code-block:: python + + [[0.00 0.75 0.99 0.00 0.75 0.99] + [0.75 0.00 0.94 0.75 0.00 0.94] + [0.99 0.94 0.00 0.99 0.94 0.00] + [0.00 0.75 0.99 0.00 0.75 0.99] + [0.75 0.00 0.94 0.75 0.00 0.94] + [0.99 0.94 0.00 0.99 0.94 0.00]] + +You can observe the diagonal is all zeros again (when rounded, the values +are slightly larger than zero because logistic squashing is used). And +the most different series are close to 1. + +When reapplying the squash function over multiple matrices, it is advised +to set the x0 and r argument manually (or extract them using the return_params +option). Otherwise they are computed based on +the given distance matrix and will be different from call to call. \ No newline at end of file diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index bfba1b9f..5cf4b076 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -4,7 +4,7 @@ np = None -def distance_to_similarity(D, r=None, method='exponential'): +def distance_to_similarity(D, r=None, method='exponential', return_params=False): """Transform a distance matrix to a similarity matrix. The available methods are: @@ -47,10 +47,13 @@ def distance_to_similarity(D, r=None, method='exponential'): S = (r - D) / r else: raise ValueError("method={} is not supported".format(method)) - return S + if return_params: + return S, r + else: + return S -def squash(X, r=None, base=None, x0=0, method="logistic"): +def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False): """Squash a function monotonically to a range between 0 and 1. The available methods are: @@ -67,17 +70,34 @@ def squash(X, r=None, base=None, x0=0, method="logistic"): Vercruyssen, V., Meert, W., Verbruggen, G., Maes, K., Baumer, R., & Davis, J. (2018). Semi-supervised anomaly detection with an application to water analytics. In 2018 IEEE international conference on data mining (ICDM) (Vol. 2018, pp. 527-536) + + :param X: Distances values + :param r: The slope of the squashing (see the formula above) + :param x0: The midpoint of the squashing (see the formula above) + :param method: The choice of sqaush function: logistic or gaussian + :param return_params: Also return the used values for r and X0 """ + result = None if method == "gaussian": x0 = 0 # not supported for gaussian if r is None: r = 1 if base is None: - return 1 - np.exp(-np.power(X - x0, 2) / r**2) - return 1 - np.power(base, -np.power(X - x0, 2) / r**2) + result = 1 - np.exp(-np.power(X - x0, 2) / r**2) + else: + result = 1 - np.power(base, -np.power(X - x0, 2) / r**2) elif method == "logistic": + if x0 is None: + x0 = np.mean(X) if r is None: - r = 1 + r = x0 / 6 if base is None: - return 1 / (1 + np.exp(-(X - x0) / r)) - return 1 / (1 + np.power(base, -(X - x0) / r)) + result = 1 / (1 + np.exp(-(X - x0) / r)) + else: + result = 1 / (1 + np.power(base, -(X - x0) / r)) + else: + raise ValueError("Unknown value for method") + if return_params: + return result, r, x0 + else: + return result From 9525b4327af10511e599508d374936946519b831 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 12 Apr 2023 10:51:37 +0200 Subject: [PATCH 40/59] docs --- docs/usage/similarity.rst | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/docs/usage/similarity.rst b/docs/usage/similarity.rst index 419ca1ba..95683aa4 100644 --- a/docs/usage/similarity.rst +++ b/docs/usage/similarity.rst @@ -1,10 +1,24 @@ -Similarity ----------- +Similarity vs Distance +---------------------- + +Distances such as Euclidean distance or Dynamic Time Warping (DTW) +return a value that expresses *how far two instances are apart*. +Such a distance is equal to zero, when the instances are equal, or larger than +zero. In certain cases you might need to translate this distance to: + +- A *similarity measure* that inverts the meaning of the returned + values and expresses *how close to instances are*. Typically also + bounded between 0 and 1, where now 1 means that two instances are equal. + +- A *bounded distance* that limits the range of the distance between + 0 and 1, where 0 means that two instances are equal. This can be achieved + by squashing to distance between 0 and 1. -Instead of expressing a distance, thus how far two instances are apart, -one can also express a similarity, how close two instances are. -Whereas a distance is larger than zero and have no upperbound, -similarity is between 0 and 1. +The DTAIDistance toolbox provides a number of transformations to +translate a distance to a similarity measure or to a squashed distance. + +Similarity +~~~~~~~~~~ Some methods require as input a similarity instead of a distance (e.g., spectral clustering). Therefore, it might be useful to translate @@ -86,4 +100,4 @@ the most different series are close to 1. When reapplying the squash function over multiple matrices, it is advised to set the x0 and r argument manually (or extract them using the return_params option). Otherwise they are computed based on -the given distance matrix and will be different from call to call. \ No newline at end of file +the given distance matrix and will be different from call to call. From 35eadf72a0b9ae2893b8a42186336853f864b789 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 12 Apr 2023 11:46:01 +0200 Subject: [PATCH 41/59] docs --- docs/usage/dtw.rst | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/usage/dtw.rst b/docs/usage/dtw.rst index 4d30f226..e2059ef2 100644 --- a/docs/usage/dtw.rst +++ b/docs/usage/dtw.rst @@ -269,19 +269,20 @@ documentation for a visual example). Multi-dimensionsal DTW ^^^^^^^^^^^^^^^^^^^^^^ -Compare two multi-dimensional sequences. +To compare two multivariate sequences, a multivariate time series with n_timesteps and +at each timestep a vector with n_values is stored in a two dimensional array of size +(n_timesteps,n_values). The first dimension of the data structure is the +sequence item index (i.e., time series index, time step) and the second dimension +is the index of the value in the vector. -Assumes the first dimension of the data structure to be the sequence item index -(or time series index). - -For example, two 2-dimensional series with five timesteps: +For example, two 2-dimensional multivariate series with five timesteps: :: from dtaidistance import dtw_ndim - series1 = np.array([[0, 0], # first 2-dim point at t=0 - [0, 1], # second 2-dim point at t=1 + series1 = np.array([[0, 0], # first point at t=0 + [0, 1], # second point at t=1 [2, 1], [0, 1], [0, 0]], dtype=np.double) @@ -302,3 +303,8 @@ n-dimensional sequences. If you want to compute the independent DTW dtw_i = 0 for dim in range(ndim): dtw_i += dtw.distance(s1[:,dim], s2[:,dim]) + +To compute a distance matrix between multivariate time series, the same +data structures are for univariate DTW are supported. The only difference +is that when all data is stored in a Numpy array, this is now a 3-dimensional +array with as size (n_series, n_timesteps, n_values). From 2de641f28bf3b97e09d4a241b4a8fd47284f991c Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 4 May 2023 17:52:59 +0200 Subject: [PATCH 42/59] Support for multivariate subseq search --- dtaidistance/subsequence/dtw.py | 25 +++++++++++++++++++++---- dtaidistance/util.py | 9 +++++++++ tests/test_subsequence.py | 19 ++++++++++++------- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index 61e3f19c..6854c016 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -494,7 +494,6 @@ def best_path(self, row, col): return p - def subsequence_search(query, series, dists_options=None, use_lb=True, max_dist=None, max_value=None, use_c=None): """See SubsequenceSearch. @@ -561,6 +560,11 @@ def __iter__(self): for ki, (v, i) in enumerate(self.ss.kbest_distances): yield SSMatch(ki, self.ss) + def __len__(self): + if self.ss.kbest_distances is None: + return 0 + return len(self.ss.kbest_distances) + def __str__(self): if len(self.ss.kbest_distances) > 10: return '[' + ', '.join(str(m) for m in self[:5]) + ' ... ' +\ @@ -570,7 +574,7 @@ def __str__(self): class SubsequenceSearch: def __init__(self, query, s, dists_options=None, use_lb=True, keep_all_distances=False, - max_dist=None, max_value=None, use_c=None): + max_dist=None, max_value=None, use_c=None, use_ndim=None): """Search the best matching (subsequence) time series compared to a given time series. :param query: Time series to search for @@ -584,6 +588,10 @@ def __init__(self, query, s, dists_options=None, use_lb=True, keep_all_distances :param max_value: Ignore normalized DTW distances larger than this value """ self.query = query + if use_ndim is None: + self.use_ndim = (util.detect_ndim(query) > 1) + else: + self.use_ndim = use_ndim self.s = s self.distances = None self.kbest_distances = None @@ -620,6 +628,15 @@ def align_fast(self, k=None): return self.align(k=k) def align(self, k=None): + if self.use_ndim: + distance = dtw_ndim.distance + lb_keogh = None + if self.use_lb: + self.use_lb = False + logger.warning('The setting use_lb is ignored for multivariate series.') + else: + distance = dtw.distance + lb_keogh = dtw.lb_keogh if self.distances is not None and self.k >= k: return if k is None or self.keep_all_distances: @@ -631,10 +648,10 @@ def align(self, k=None): max_dist = self.max_dist for idx, series in enumerate(self.s): if self.use_lb: - lb = dtw.lb_keogh(self.query, series, **self.dists_options) + lb = lb_keogh(self.query, series, **self.dists_options) if lb > max_dist: continue - dist = dtw.distance(self.query, series, **self.dists_options) + dist = distance(self.query, series, **self.dists_options) if k is not None: if len(h) < k: if not np.isinf(dist) and dist <= max_dist: diff --git a/dtaidistance/util.py b/dtaidistance/util.py index fdc8dee9..9588fad7 100644 --- a/dtaidistance/util.py +++ b/dtaidistance/util.py @@ -173,6 +173,15 @@ def strip_comments(reader): return matrix +def detect_ndim(s): + if np is not None and isinstance(s, np.ndarray): + return s.ndim + if type(s) is list and len(s) > 0: + return detect_ndim(s[0]) + 1 + if type(s) in [int, float]: + return 0 + return None + class SeriesContainer: def __init__(self, series, support_ndim=True): """Container for a list of series. diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index 9be7a0eb..b5c27f08 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -4,12 +4,13 @@ import time from pathlib import Path -from dtaidistance import util_numpy +from dtaidistance import util_numpy, util from dtaidistance.subsequence.dtw import subsequence_alignment, local_concurrences,\ subsequence_search from dtaidistance import dtw_visualisation as dtwvis from dtaidistance.exceptions import MatplotlibException from dtaidistance.dtw import lb_keogh +from dtaidistance import dtw, dtw_ndim directory = None numpyonly = pytest.mark.skipif("util_numpy.test_without_numpy()") @@ -122,14 +123,18 @@ def test_dtw_subseq_ndim(): @numpyonly -def test_dtw_subseq_ndim2(): - use_c = False +@pytest.mark.parametrize("use_c", [False, True]) +def test_dtw_subseq_ndim2(use_c): with util_numpy.test_uses_numpy() as np: s = [np.array([[1., 1], [2, 2], [3, 3]]), - np.array([[2, 2], [3, 3], [1, 1]])] + np.array([[2., 2], [3, 3], [1, 1]])] query = np.array([[2.0, 2.1], [3.1, 3.0]]) - sa = subsequence_search(query, s) - print(sa.best_match()) + d1 = [dtw_ndim.distance(si, query, use_c=use_c) for si in s] + sa = subsequence_search(query, s, use_lb=False, use_c=use_c) + assert str(sa.best_match()) == 'SSMatch(0)' + d2 = [m.distance for m in sa.kbest_matches(k=2)] + for d1i, d2i in zip(d1, d2): + assert d1i == pytest.approx(d2i) @pytest.mark.skip @@ -408,7 +413,7 @@ def test_lb1(use_c): # test_dtw_subseq_eeg() # test_dtw_subseq_bug1() # test_dtw_subseq_ndim() - test_dtw_subseq_ndim2() + test_dtw_subseq_ndim2(use_c=True) # test_dtw_localconcurrences_eeg() # test_dtw_subseqsearch_eeg2() # test_lc_pat1() From 1aa2797e5bb496ee725ecce497861a99bbb33d8b Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 18 May 2023 00:11:16 +0200 Subject: [PATCH 43/59] squared euclidean --- dtaidistance/dtw.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 98b6c176..36b78ce2 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -185,7 +185,8 @@ def ub_euclidean(s1, s2): def distance(s1, s2, window=None, max_dist=None, max_step=None, max_length_diff=None, penalty=None, psi=None, - use_c=False, use_pruning=False, only_ub=False): + use_c=False, use_pruning=False, only_ub=False, + inner_distance="squared euclidean"): """ Dynamic Time Warping. @@ -216,10 +217,14 @@ def distance(s1, s2, :param use_pruning: Prune values based on Euclidean distance. This is the same as passing ub_euclidean() to max_dist :param only_ub: Only compute the upper bound (Euclidean). + :param inner_distance: Distance between two points in the time series. + One of 'squared euclidean', 'euclidean' Returns: DTW distance """ if use_c: + if inner_distance != "squared euclidean": + raise AttributeError('The use_c=True argument requires inner_distance=squared euclidean') if dtw_cc is None: logger.warning("C-library not available, using the Python version") else: @@ -252,6 +257,13 @@ def distance(s1, s2, penalty = 0 else: penalty *= penalty + idist_fn = None + if inner_distance == "squared euclidean": + idist_fn = lambda a, b: (a - b) ** 2 + elif inner_distance == "euclidean": + idist_fn = lambda a, b: abs(a - b) + else: + raise AttributeError("Unknown value for argument inner_distance") psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(psi) length = min(c + 1, abs(r - c) + 2 * (window - 1) + 1 + 1 + 1) # print("length (py) = {}".format(length)) @@ -286,7 +298,8 @@ def distance(s1, s2, if psi_1b != 0 and j_start == 0 and i < psi_1b: dtw[i1 * length] = 0 for j in range(j_start, j_end): - d = (s1[i] - s2[j])**2 + # d = (s1[i] - s2[j])**2 + d = idist_fn(s1[i], s2[j]) if d > max_step: continue assert j + 1 - skip >= 0 @@ -322,7 +335,8 @@ def distance(s1, s2, d = min(dtw[i1 * length + min(c, c + window - 1) - skip], psi_shortest) if max_dist and d > max_dist: d = inf - d = math.sqrt(d) + if inner_distance == "squared euclidean": + d = math.sqrt(d) return d From e22600fcdd1d36701714606ac1cbefbc6d4cf176 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 6 Jun 2023 23:18:32 +0200 Subject: [PATCH 44/59] inner dist --- dtaidistance/dtaidistancec_dtw.pxd | 1 + dtaidistance/dtaidistancec_ed.pxd | 2 + dtaidistance/dtw.py | 69 +- dtaidistance/dtw_cc.pyx | 71 +- dtaidistance/ed.py | 15 +- dtaidistance/ed_cc.pyx | 18 +- dtaidistance/innerdistance.py | 123 + dtaidistance/jinja/dtw_cc.jinja.pyx | 47 +- .../jinja/dtw_cc_warpingpath.jinja.pyx | 9 +- dtaidistance/jinja/ed_cc.jinja.pyx | 19 +- .../DTAIDistanceC/dd_benchmark.c | 15 +- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c | 2075 +++++++++++++---- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h | 16 + .../lib/DTAIDistanceC/DTAIDistanceC/dd_ed.c | 140 +- .../lib/DTAIDistanceC/DTAIDistanceC/dd_ed.h | 4 +- .../DTAIDistanceC/dd_tests_dtw.c | 21 + .../DTAIDistanceC/jinja/Makefile | 17 +- .../DTAIDistanceC/jinja/dd_dtw.jinja.c | 134 +- .../DTAIDistanceC/jinja/dtw_distance.jinja.c | 47 +- .../DTAIDistanceC/jinja/dtw_expandwps.jinja.c | 8 + .../jinja/dtw_warpingpaths.jinja.c | 46 +- .../DTAIDistanceC/jinja/generate.py | 18 +- tests/test_dtw.py | 28 +- tests/test_subsequence.py | 4 +- tests/test_warping.py | 4 +- 25 files changed, 2357 insertions(+), 594 deletions(-) create mode 100644 dtaidistance/innerdistance.py diff --git a/dtaidistance/dtaidistancec_dtw.pxd b/dtaidistance/dtaidistancec_dtw.pxd index b0d929bd..60d9ca64 100644 --- a/dtaidistance/dtaidistancec_dtw.pxd +++ b/dtaidistance/dtaidistancec_dtw.pxd @@ -15,6 +15,7 @@ cdef extern from "dd_dtw.h": Py_ssize_t psi_2e bint use_pruning bint only_ub + int inner_dist ctypedef struct DTWBlock: Py_ssize_t rb diff --git a/dtaidistance/dtaidistancec_ed.pxd b/dtaidistance/dtaidistancec_ed.pxd index 95ae8590..33de0310 100644 --- a/dtaidistance/dtaidistancec_ed.pxd +++ b/dtaidistance/dtaidistancec_ed.pxd @@ -4,4 +4,6 @@ from dtaidistancec_globals cimport seq_t cdef extern from "dd_ed.h": seq_t euclidean_distance(seq_t *s1, Py_ssize_t l1, seq_t *s2, Py_ssize_t l2) + seq_t euclidean_distance_euclidean(seq_t *s1, Py_ssize_t l1, seq_t *s2, Py_ssize_t l2) seq_t euclidean_distance_ndim(seq_t *s1, Py_ssize_t l1, seq_t *s2, Py_ssize_t l2, int ndim) + seq_t euclidean_distance_ndim_euclidean(seq_t *s1, Py_ssize_t l1, seq_t *s2, Py_ssize_t l2, int ndim) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 36b78ce2..fb6e61d5 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -17,6 +17,7 @@ from . import ed from . import util from . import util_numpy +from . import innerdistance from .util import SeriesContainer from .exceptions import NumpyException @@ -108,7 +109,7 @@ def _check_library(include_omp=False, raise_exception=True): class DTWSettings: def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, - max_length_diff=None, penalty=None, psi=None): + max_length_diff=None, penalty=None, psi=None, inner_dist=innerdistance.default): self.window = window self.use_pruning = use_pruning self.max_dist = max_dist @@ -116,6 +117,7 @@ def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, self.max_length_diff = max_length_diff self.penalty = penalty self.psi = psi + self.inner_dist = inner_dist @staticmethod def for_dtw(s1, s2, **kwargs): @@ -134,13 +136,15 @@ def c_kwargs(self): max_length_diff = 0 if self.max_length_diff is None else self.max_length_diff penalty = 0 if self.penalty is None else self.penalty psi = 0 if self.psi is None else self.psi + inner_dist = innerdistance.to_c(self.inner_dist) return { 'window': window, 'max_dist': max_dist, 'max_step': max_step, 'max_length_diff': max_length_diff, 'penalty': penalty, - 'psi': psi + 'psi': psi, + 'inner_dist': inner_dist } def __str__(self): @@ -152,12 +156,13 @@ def __str__(self): def lb_keogh(s1, s2, window=None, max_dist=None, - max_step=None, max_length_diff=None, use_c=False): + max_step=None, max_length_diff=None, use_c=False, inner_dist=innerdistance.default): """Lowerbound LB_KEOGH""" if use_c: - return dtw_cc.lb_keogh(s1, s2, window=window, max_dist=max_dist, max_step=max_step) + return dtw_cc.lb_keogh(s1, s2, window=window, max_dist=max_dist, max_step=max_step, inner_dist=inner_dist) if window is None: window = max(len(s1), len(s2)) + idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=False) t = 0 imin_diff = max(0, len(s1) - len(s2)) + window - 1 @@ -169,24 +174,24 @@ def lb_keogh(s1, s2, window=None, max_dist=None, li = array_min(s2[imin:imax]) ci = s1[i] if ci > ui: - t += (ci - ui)**2 + t += idist_fn(ci, ui) # (ci - ui)**2 elif ci < li: - t += (ci - li)**2 + t += idist_fn(ci, li) # (ci - li)**2 else: pass - return math.sqrt(t) + return result_fn(t) -def ub_euclidean(s1, s2): +def ub_euclidean(s1, s2, inner_dist=innerdistance.default): """ See ed.euclidean_distance""" - return ed.distance(s1, s2) + return ed.distance(s1, s2, inner_dist=inner_dist) def distance(s1, s2, window=None, max_dist=None, max_step=None, max_length_diff=None, penalty=None, psi=None, use_c=False, use_pruning=False, only_ub=False, - inner_distance="squared euclidean"): + inner_dist=innerdistance.default): """ Dynamic Time Warping. @@ -217,14 +222,12 @@ def distance(s1, s2, :param use_pruning: Prune values based on Euclidean distance. This is the same as passing ub_euclidean() to max_dist :param only_ub: Only compute the upper bound (Euclidean). - :param inner_distance: Distance between two points in the time series. - One of 'squared euclidean', 'euclidean' + :param inner_dist: Distance between two points in the time series. + One of 'squared euclidean' (default), 'euclidean' Returns: DTW distance """ if use_c: - if inner_distance != "squared euclidean": - raise AttributeError('The use_c=True argument requires inner_distance=squared euclidean') if dtw_cc is None: logger.warning("C-library not available, using the Python version") else: @@ -235,7 +238,8 @@ def distance(s1, s2, penalty=penalty, psi=psi, use_pruning=use_pruning, - only_ub=only_ub) + only_ub=only_ub, + inner_dist=inner_dist) r, c = len(s1), len(s2) if max_length_diff is not None and abs(r - c) > max_length_diff: return inf @@ -257,13 +261,7 @@ def distance(s1, s2, penalty = 0 else: penalty *= penalty - idist_fn = None - if inner_distance == "squared euclidean": - idist_fn = lambda a, b: (a - b) ** 2 - elif inner_distance == "euclidean": - idist_fn = lambda a, b: abs(a - b) - else: - raise AttributeError("Unknown value for argument inner_distance") + idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=False) psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(psi) length = min(c + 1, abs(r - c) + 2 * (window - 1) + 1 + 1 + 1) # print("length (py) = {}".format(length)) @@ -335,13 +333,13 @@ def distance(s1, s2, d = min(dtw[i1 * length + min(c, c + window - 1) - skip], psi_shortest) if max_dist and d > max_dist: d = inf - if inner_distance == "squared euclidean": - d = math.sqrt(d) + d = result_fn(d) return d def distance_fast(s1, s2, window=None, max_dist=None, - max_step=None, max_length_diff=None, penalty=None, psi=None, use_pruning=False, only_ub=False): + max_step=None, max_length_diff=None, penalty=None, psi=None, use_pruning=False, only_ub=False, + inner_dist=innerdistance.default): """Same as :meth:`distance` but with different defaults to chose the fast C-based version of the implementation (use_c = True). @@ -362,7 +360,8 @@ def distance_fast(s1, s2, window=None, max_dist=None, penalty=penalty, psi=psi, use_pruning=use_pruning, - only_ub=only_ub) + only_ub=only_ub, + inner_dist=inner_dist) return d @@ -391,7 +390,7 @@ def _process_psi_arg(psi): def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, max_step=None, max_length_diff=None, penalty=None, psi=None, psi_neg=True, - use_c=False, use_ndim=False): + use_c=False, use_ndim=False, inner_dist=innerdistance.default): """ Dynamic Time Warping. @@ -411,19 +410,19 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, :param use_c: Use the C implementation instead of Python :param use_ndim: The input series is >1 dimensions. Use cost = EuclideanDistance(s1[i], s2[j]) + :param inner_dist: Distance between two points in the time series. + One of 'squared euclidean' (default), 'euclidean' :returns: (DTW distance, DTW matrix) """ if use_c: return warping_paths_fast(s1, s2, window=window, max_dist=max_dist, use_pruning=use_pruning, max_step=max_step, max_length_diff=max_length_diff, penalty=penalty, psi=psi, psi_neg=psi_neg, compact=False, - use_ndim=use_ndim) + use_ndim=use_ndim, inner_dist=inner_dist) if np is None: raise NumpyException("Numpy is required for the warping_paths method") - if use_ndim: - cost = lambda x, y: np.sum((x - y) ** 2) - else: - cost = lambda x, y: (x - y) ** 2 + # Always use ndim to use np functions + cost, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=True) r, c = len(s1), len(s2) if max_length_diff is not None and abs(r - c) > max_length_diff: return inf @@ -493,7 +492,7 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, ec_next = j + 1 ec = ec_next # Decide which d to return - dtw = np.sqrt(dtw) + dtw = result_fn(dtw) if psi_1e == 0 and psi_2e == 0: d = dtw[i1, min(c, c + window - 1)] else: @@ -528,7 +527,7 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, def warping_paths_fast(s1, s2, window=None, max_dist=None, use_pruning=False, max_step=None, max_length_diff=None, penalty=None, psi=None, psi_neg=True, compact=False, - use_ndim=False): + use_ndim=False, inner_dist=innerdistance.default): """Fast C version of :meth:`warping_paths`. Additional parameters: @@ -542,7 +541,7 @@ def warping_paths_fast(s1, s2, window=None, max_dist=None, use_pruning=False, c = len(s2) _check_library(raise_exception=True) settings = DTWSettings.for_dtw(s1, s2, window=window, max_dist=max_dist, use_pruning=use_pruning, max_step=max_step, - max_length_diff=max_length_diff, penalty=penalty, psi=psi) + max_length_diff=max_length_diff, penalty=penalty, psi=psi, inner_dist=inner_dist) if compact: wps_width = dtw_cc.wps_width(r, c, **settings.c_kwargs()) wps_compact = np.full((len(s1)+1, wps_width), inf) diff --git a/dtaidistance/dtw_cc.pyx b/dtaidistance/dtw_cc.pyx index 78800ddf..73573bfc 100644 --- a/dtaidistance/dtw_cc.pyx +++ b/dtaidistance/dtw_cc.pyx @@ -78,18 +78,6 @@ cdef class DTWWps: def __init__(self, l1, l2, DTWSettings settings): self._wps = dtaidistancec_dtw.dtw_wps_parts(l1, l2, &settings._settings) - @property - def ri1(self): - return self._wps.ri1 - - @property - def ri2(self): - return self._wps.ri2 - - @property - def ri3(self): - return self._wps.ri3 - cdef class DTWSettings: def __cinit__(self): @@ -155,6 +143,13 @@ cdef class DTWSettings: self._settings.only_ub = False else: self._settings.only_ub = kwargs["only_ub"] + if "inner_dist" in kwargs: + if kwargs["inner_dist"] == "squared euclidean" or kwargs["inner_dist"] == 0: + self._settings.inner_dist = 0 + elif kwargs["inner_dist"] == "euclidean" or kwargs["inner_dist"] == 1: + self._settings.inner_dist = 1 + else: + raise AttributeError("Unknown inner_dist: {}".format(kwargs["inner_dist"])) @property def window(self): @@ -193,6 +188,15 @@ cdef class DTWSettings: def only_ub(self): return self._settings.only_ub + @property + def inner_dist(self): + if self._settings.inner_dist == 0: + return "squared euclidean" + elif self._settings.inner_dist == 1: + return "euclidean" + else: + return "unknown inner distance" + def __str__(self): return ( "DTWSettings {\n" @@ -204,6 +208,7 @@ cdef class DTWSettings: f" psi = {self.psi}\n" f" use_pruning = {self.use_pruning}\n" f" only_ub = {self.only_ub}\n" + f" inner_dist = {self.inner_dist}\n" "}") @@ -488,6 +493,7 @@ def warping_paths_compact_affinity(seq_t[:, :] dtw, seq_t[:] s1, seq_t[:] s2, def warping_path(seq_t[:] s1, seq_t[:] s2, include_distance=False, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; + cdef seq_t dist; settings = DTWSettings(**kwargs) cdef Py_ssize_t *i1 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i1: @@ -495,9 +501,8 @@ def warping_path(seq_t[:] s1, seq_t[:] s2, include_distance=False, **kwargs): cdef Py_ssize_t *i2 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i2: raise MemoryError() - d = None try: - d = dtaidistancec_dtw.dtw_warping_path(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &path_length, &settings._settings) + dist = dtaidistancec_dtw.dtw_warping_path(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &path_length, &settings._settings) path = [] for i in range(path_length): path.append((i1[i], i2[i])) @@ -505,14 +510,15 @@ def warping_path(seq_t[:] s1, seq_t[:] s2, include_distance=False, **kwargs): finally: PyMem_Free(i1) PyMem_Free(i2) - if include_distance is True: - return path, d + if include_distance: + return path, dist return path -def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, include_distance=False, int ndim=1, **kwargs): +def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1, include_distance=False, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; + cdef seq_t dist; settings = DTWSettings(**kwargs) cdef Py_ssize_t *i1 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i1: @@ -520,10 +526,8 @@ def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, include_distance=False, in cdef Py_ssize_t *i2 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i2: raise MemoryError() - d = None try: - d = dtaidistancec_dtw.dtw_warping_path_ndim(&s1[0, 0], len(s1), &s2[0, 0], len(s2), - i1, i2, &path_length, ndim, &settings._settings) + dist = dtaidistancec_dtw.dtw_warping_path_ndim(&s1[0, 0], len(s1), &s2[0, 0], len(s2), i1, i2, &path_length, ndim, &settings._settings) path = [] for i in range(path_length): path.append((i1[i], i2[i])) @@ -531,8 +535,8 @@ def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, include_distance=False, in finally: PyMem_Free(i1) PyMem_Free(i2) - if include_distance is True: - return path, d + if include_distance: + return path, dist return path @@ -555,17 +559,13 @@ def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize l1, l2, rb, re, cb, ce, &settings._settings) -def wps_print(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): - settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps(&wps[0,0], l1, l2, &settings._settings) - -def wps_print_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): +def wps_print(seq_t[:, :] wps, **kwargs): settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], l1, l2, &settings._settings) + dtaidistancec_dtw.dtw_print_wps(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) -def wps_parts(Py_ssize_t l1, Py_ssize_t l2, **kwargs): +def wps_print_compact(seq_t[:, :] wps, **kwargs): settings = DTWSettings(**kwargs) - return DTWWps(l1, l2, settings) + dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): cdef Py_ssize_t path_length; @@ -577,7 +577,7 @@ def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): if not i2: raise MemoryError() try: - path_length = dtaidistancec_dtw.dtw_best_path(&wps[0, 0], i1, i2, l1, l2, + path_length = dtaidistancec_dtw.dtw_best_path(&wps[0,0], i1, i2, l1, l2, &settings._settings) path = [] for i in range(path_length): @@ -618,6 +618,7 @@ def srand(unsigned int seed): def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, include_distance=False, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; + cdef seq_t dist; settings = DTWSettings(**kwargs) cdef Py_ssize_t *i1 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i1: @@ -625,11 +626,9 @@ def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, include_distance=Fals cdef Py_ssize_t *i2 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i2: raise MemoryError() - d = None try: - d = dtaidistancec_dtw.dtw_warping_path_prob_ndim(&s1[0], len(s1), &s2[0], len(s2), - i1, i2, &path_length, - avg, 1, &settings._settings) + dist = dtaidistancec_dtw.dtw_warping_path_prob_ndim(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &path_length, + avg, 1, &settings._settings) path = [] for i in range(path_length): path.append((i1[i], i2[i])) @@ -638,7 +637,7 @@ def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, include_distance=Fals PyMem_Free(i1) PyMem_Free(i2) if include_distance: - return path, d + return path, dist return path diff --git a/dtaidistance/ed.py b/dtaidistance/ed.py index 43c88bbb..9bb9ee89 100644 --- a/dtaidistance/ed.py +++ b/dtaidistance/ed.py @@ -14,6 +14,7 @@ import math from . import util_numpy +from . import innerdistance logger = logging.getLogger("be.kuleuven.dtai.distance") @@ -44,7 +45,7 @@ def _check_library(raise_exception=True): raise Exception(msg) -def distance(s1, s2): +def distance(s1, s2, inner_dist=innerdistance.default): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -53,8 +54,10 @@ def distance(s1, s2): :param s1: Sequence of numbers :param s2: Sequence of numbers + :param inner_dist: Inner distance function between two values :return: Euclidean distance """ + idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist=inner_dist, use_ndim=False) n = min(len(s1), len(s2)) ub = 0 for v1, v2 in zip(s1, s2): @@ -64,19 +67,19 @@ def distance(s1, s2): if len(s1) > len(s2): v2 = s2[n - 1] for v1 in s1[n:]: - ub += (v1 - v2)**2 + ub += idist_fn(v1, v2) # (v1 - v2)**2 elif len(s1) < len(s2): v1 = s1[n-1] for v2 in s2[n:]: - ub += (v1 - v2)**2 - return math.sqrt(ub) + ub += idist_fn(v1, v2) # (v1 - v2)**2 + return result_fn(ub) # math.sqrt(ub) -def distance_fast(s1, s2): +def distance_fast(s1, s2, inner_dist=innerdistance.default): _check_library(raise_exception=True) # Check that Numpy arrays for C contiguous s1 = util_numpy.verify_np_array(s1) s2 = util_numpy.verify_np_array(s2) # Move data to C library - d = ed_cc.distance(s1, s2) + d = ed_cc.distance(s1, s2, innerdistance.to_c(inner_dist)) return d diff --git a/dtaidistance/ed_cc.pyx b/dtaidistance/ed_cc.pyx index e9f70e3c..1320106a 100644 --- a/dtaidistance/ed_cc.pyx +++ b/dtaidistance/ed_cc.pyx @@ -17,7 +17,7 @@ from dtaidistancec_dtw cimport seq_t logger = logging.getLogger("be.kuleuven.dtai.distance") -def distance(seq_t[:] s1, seq_t[:] s2): +def distance(seq_t[:] s1, seq_t[:] s2, int inner_dist=0): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -27,10 +27,15 @@ def distance(seq_t[:] s1, seq_t[:] s2): :param s2: Sequence of numbers :return: Euclidean distance """ - return dtaidistancec_ed.euclidean_distance(&s1[0], len(s1), &s2[0], len(s2)) + if inner_dist == 0: + return dtaidistancec_ed.euclidean_distance(&s1[0], len(s1), &s2[0], len(s2)) + elif inner_dist == 1: + return dtaidistancec_ed.euclidean_distance_euclidean(&s1[0], len(s1), &s2[0], len(s2)) + else: + raise AttributeError("Unknown inner distance") -def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2): +def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int inner_dist=0): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -44,4 +49,9 @@ def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2): if s1.shape[1] != s2.shape[1]: raise Exception("Dimension of sequence entries needs to be the same: {} != {}".format(s1.shape[1], s2.shape[1])) ndim = s1.shape[1] - return dtaidistancec_ed.euclidean_distance_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim) + if inner_dist == 0: + return dtaidistancec_ed.euclidean_distance_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim) + elif inner_dist == 1: + return dtaidistancec_ed.euclidean_distance_ndim_euclidean(&s1[0, 0], len(s1), &s2[0, 0], len(s2), ndim) + else: + raise AttributeError("Unknown inner distance") \ No newline at end of file diff --git a/dtaidistance/innerdistance.py b/dtaidistance/innerdistance.py new file mode 100644 index 00000000..c728fea8 --- /dev/null +++ b/dtaidistance/innerdistance.py @@ -0,0 +1,123 @@ +# -*- coding: UTF-8 -*- +""" +dtaidistance.innerdistance +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Inner distances for DTW and ED + +:author: Wannes Meert +:copyright: Copyright 2023 KU Leuven, DTAI Research Group. +:license: Apache License, Version 2.0, see LICENSE for details. + +""" +import math +import logging + +from . import util +from . import util_numpy + +try: + if util_numpy.test_without_numpy(): + raise ImportError() + import numpy as np + DTYPE = np.double + argmin = np.argmin + argmax = np.argmax + array_min = np.min + array_max = np.max +except ImportError: + np = None + argmin = util.argmin + argmax = util.argmax + array_min = min + array_max = max + + +logger = logging.getLogger("be.kuleuven.dtai.distance") +default = 'squared euclidean' + + +class SquaredEuclidean: + + @staticmethod + def inner_dist(x, y): + return (x - y) ** 2 + + @staticmethod + def result(x): + return math.sqrt(x) + + +class SquaredEuclideanNdim: + + @staticmethod + def inner_dist(x, y): + return np.sum((x - y) ** 2) + + @staticmethod + def result(x): + return np.sqrt(x) + + +class Euclidean: + + @staticmethod + def inner_dist(x, y): + return abs(x - y) + + @staticmethod + def result(x): + return x + + +class EuclideanNdim: + + @staticmethod + def inner_dist(x, y): + return np.sqrt(np.sum(np.power(x - y, 2))) + + @staticmethod + def result(x): + return x + + +class CustomInnerDist: + + @staticmethod + def inner_dist(x, y): + raise Exception("Function not defined") + + @staticmethod + def result(x): + raise Exception("Function not defined") + + +def inner_dist_fns(inner_dist="squared euclidean", use_ndim=False): + use_cls = None + if inner_dist == "squared euclidean": + if use_ndim: + use_cls = SquaredEuclideanNdim + else: + use_cls = SquaredEuclidean + elif inner_dist == "euclidean": + if use_ndim: + use_cls = EuclideanNdim + else: + use_cls = Euclidean + elif hasattr(inner_dist, 'inner_dist') and hasattr(inner_dist, 'result'): + use_cls = inner_dist + else: + raise AttributeError("Unknown value for argument inner_dist") + return use_cls.inner_dist, use_cls.result + + +def to_c(inner_dist): + if inner_dist == 'squared euclidean': + return 0 + elif inner_dist == 'euclidean': + return 1 + elif hasattr(inner_dist, 'inner_dist') and hasattr(inner_dist, 'result'): + raise AttributeError('Custom inner distance functions are not supported for the fast C implementation') + else: + raise AttributeError('Unknown inner_dist: {}'.format(inner_dist)) + diff --git a/dtaidistance/jinja/dtw_cc.jinja.pyx b/dtaidistance/jinja/dtw_cc.jinja.pyx index 8b2ee7b2..aebdb8ca 100644 --- a/dtaidistance/jinja/dtw_cc.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc.jinja.pyx @@ -143,6 +143,13 @@ cdef class DTWSettings: self._settings.only_ub = False else: self._settings.only_ub = kwargs["only_ub"] + if "inner_dist" in kwargs: + if kwargs["inner_dist"] == "squared euclidean" or kwargs["inner_dist"] == 0: + self._settings.inner_dist = 0 + elif kwargs["inner_dist"] == "euclidean" or kwargs["inner_dist"] == 1: + self._settings.inner_dist = 1 + else: + raise AttributeError("Unknown inner_dist: {}".format(kwargs["inner_dist"])) @property def window(self): @@ -181,6 +188,15 @@ cdef class DTWSettings: def only_ub(self): return self._settings.only_ub + @property + def inner_dist(self): + if self._settings.inner_dist == 0: + return "squared euclidean" + elif self._settings.inner_dist == 1: + return "euclidean" + else: + return "unknown inner distance" + def __str__(self): return ( "DTWSettings {\n" @@ -192,6 +208,7 @@ cdef class DTWSettings: f" psi = {self.psi}\n" f" use_pruning = {self.use_pruning}\n" f" only_ub = {self.only_ub}\n" + f" inner_dist = {self.inner_dist}\n" "}") @@ -389,6 +406,27 @@ def wps_print_compact(seq_t[:, :] wps, **kwargs): settings = DTWSettings(**kwargs) dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) +def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): + cdef Py_ssize_t path_length; + settings = DTWSettings(**kwargs) + cdef Py_ssize_t *i1 = PyMem_Malloc((l1 + l2) * sizeof(Py_ssize_t)) + if not i1: + raise MemoryError() + cdef Py_ssize_t *i2 = PyMem_Malloc((l1 + l2) * sizeof(Py_ssize_t)) + if not i2: + raise MemoryError() + try: + path_length = dtaidistancec_dtw.dtw_best_path(&wps[0,0], i1, i2, l1, l2, + &settings._settings) + path = [] + for i in range(path_length): + path.append((i1[i], i2[i])) + path.reverse() + finally: + PyMem_Free(i1) + PyMem_Free(i2) + return path + def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) @@ -416,9 +454,10 @@ def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, ** def srand(unsigned int seed): dtaidistancec_dtw.dtw_srand(seed) -def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): +def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, include_distance=False, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; + cdef seq_t dist; settings = DTWSettings(**kwargs) cdef Py_ssize_t *i1 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i1: @@ -427,8 +466,8 @@ def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): if not i2: raise MemoryError() try: - path_length = dtaidistancec_dtw.warping_path_prob_ndim(&s1[0], len(s1), &s2[0], len(s2), i1, i2, - avg, 1, &settings._settings) + dist = dtaidistancec_dtw.dtw_warping_path_prob_ndim(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &path_length, + avg, 1, &settings._settings) path = [] for i in range(path_length): path.append((i1[i], i2[i])) @@ -436,6 +475,8 @@ def warping_path_prob(seq_t[:] s1, seq_t[:] s2, seq_t avg, **kwargs): finally: PyMem_Free(i1) PyMem_Free(i2) + if include_distance: + return path, dist return path {% set suffix = '' %} diff --git a/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx b/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx index d5401f31..00456af3 100644 --- a/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc_warpingpath.jinja.pyx @@ -5,9 +5,10 @@ def warping_path{{suffix}}( {%- else -%} seq_t[:] s1, seq_t[:] s2,{{s}} {%- endif -%} - **kwargs): + include_distance=False, **kwargs): # Assumes C contiguous cdef Py_ssize_t path_length; + cdef seq_t dist; settings = DTWSettings(**kwargs) cdef Py_ssize_t *i1 = PyMem_Malloc((len(s1) + len(s2)) * sizeof(Py_ssize_t)) if not i1: @@ -17,9 +18,9 @@ def warping_path{{suffix}}( raise MemoryError() try: {%- if "ndim" in suffix %} - path_length = dtaidistancec_dtw.warping_path_ndim(&s1[0, 0], len(s1), &s2[0, 0], len(s2), i1, i2, ndim, &settings._settings) + dist = dtaidistancec_dtw.dtw_warping_path_ndim(&s1[0, 0], len(s1), &s2[0, 0], len(s2), i1, i2, &path_length, ndim, &settings._settings) {%- else %} - path_length = dtaidistancec_dtw.warping_path(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &settings._settings) + dist = dtaidistancec_dtw.dtw_warping_path(&s1[0], len(s1), &s2[0], len(s2), i1, i2, &path_length, &settings._settings) {%- endif %} path = [] for i in range(path_length): @@ -28,4 +29,6 @@ def warping_path{{suffix}}( finally: PyMem_Free(i1) PyMem_Free(i2) + if include_distance: + return path, dist return path diff --git a/dtaidistance/jinja/ed_cc.jinja.pyx b/dtaidistance/jinja/ed_cc.jinja.pyx index 75ca257c..04ffee6c 100644 --- a/dtaidistance/jinja/ed_cc.jinja.pyx +++ b/dtaidistance/jinja/ed_cc.jinja.pyx @@ -17,7 +17,7 @@ from dtaidistancec_dtw cimport seq_t logger = logging.getLogger("be.kuleuven.dtai.distance") -def distance(seq_t[:] s1, seq_t[:] s2): +def distance(seq_t[:] s1, seq_t[:] s2, int inner_dist=0): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -27,10 +27,15 @@ def distance(seq_t[:] s1, seq_t[:] s2): :param s2: Sequence of numbers :return: Euclidean distance """ - return dtaidistancec_ed.euclidean_distance(&s1[0], len(s1), &s2[0], len(s2)) + if inner_dist == 0: + return dtaidistancec_ed.euclidean_distance(&s1[0], len(s1), &s2[0], len(s2)) + elif inner_dist == 1: + return dtaidistancec_ed.euclidean_distance_euclidean(&s1[0], len(s1), &s2[0], len(s2)) + else: + raise AttributeError("Unknown inner distance") -def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2): +def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int inner_dist=0): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -44,5 +49,9 @@ def distance_ndim(seq_t[:, :] s1, seq_t[:, :] s2): if s1.shape[1] != s2.shape[1]: raise Exception("Dimension of sequence entries needs to be the same: {} != {}".format(s1.shape[1], s2.shape[1])) ndim = s1.shape[1] - return dtaidistancec_ed.euclidean_distance_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim) - + if inner_dist == 0: + return dtaidistancec_ed.euclidean_distance_ndim(&s1[0,0], len(s1), &s2[0,0], len(s2), ndim) + elif inner_dist == 1: + return dtaidistancec_ed.euclidean_distance_ndim_euclidean(&s1[0, 0], len(s1), &s2[0, 0], len(s2), ndim) + else: + raise AttributeError("Unknown inner distance") diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c index 736febe6..d65bdd23 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c @@ -107,11 +107,18 @@ void benchmark3() { } void benchmark4() { - double s1[] = {0, 0, 1, 2, 1, 0, 1, 0, 0}; - double s2[] = {0, 1, 2, 0, 0, 0, 0, 0, 0}; +// double s1[] = {0, 0, 1, 2, 1, 0, 1, 0, 0}; int l1 = 9; +// double s2[] = {0, 1, 2, 0, 0, 0, 0, 0, 0}; int l2 = 9; + double s1[] = {0., 0., 1., 2., 1., 0., 1., 0., 0., 2., 1., 0., 0.}; int l1 = 13; + double s2[] = {0., 1., 2., 3., 1., 0., 0., 0., 2., 1., 0., 0., 0.}; int l2 = 13; DTWSettings settings = dtw_settings_default(); settings.use_pruning = true; - double d = dtw_distance(s1, 9, s2, 9, &settings); + settings.inner_dist = 0; + dtw_settings_set_psi(2, &settings); +// double d = dtw_distance(s1, 9, s2, 9, &settings); + idx_t wps_length = dtw_settings_wps_length(l1, l2, &settings); + seq_t wps[wps_length]; + double d = dtw_warping_paths(wps, s1, l1, s2, l2, true, true, false, &settings); printf("d=%f\n", d); } @@ -593,7 +600,7 @@ int main(int argc, const char * argv[]) { // benchmark5(); // benchmark6(); // benchmark7(); - //benchmark8(); +// benchmark8(); // benchmark9(); // benchmark10(); // benchmark11(); diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c index 7f4577ef..6f236b83 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c @@ -26,7 +26,8 @@ DTWSettings dtw_settings_default(void) { .psi_2b = 0, .psi_2e = 0, .use_pruning = false, - .only_ub = false + .only_ub = false, + .inner_dist = 0 }; return s; } @@ -59,13 +60,16 @@ void dtw_settings_print(DTWSettings *settings) { settings->psi_2b, settings->psi_2e); printf(" use_pruning = %d\n", settings->use_pruning); printf(" only_ub = %d\n", settings->only_ub); + printf(" inner_dist = %d\n", settings->inner_dist); printf("}\n"); } // MARK: DTW + /** Compute the DTW between two series. +Use the Squared Euclidean inner distance. @param s1 First sequence @param l1 Length of first sequence. @@ -76,6 +80,9 @@ Compute the DTW between two series. seq_t dtw_distance(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings) { + if (settings->inner_dist == 1) { + return dtw_distance_euclidean(s1, l1, s2, l2, settings); + } assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && settings->psi_2b <= l2 && settings->psi_2e <= l2); idx_t ldiff; @@ -96,7 +103,8 @@ seq_t dtw_distance(seq_t *s1, idx_t l1, printf("r=%zu, c=%zu\n", l1, l2); #endif if (settings->use_pruning || settings->only_ub) { - max_dist = pow(ub_euclidean(s1, l1, s2, l2), 2); + max_dist = ub_euclidean(s1, l1, s2, l2); + max_dist = pow(max_dist, 2); if (settings->only_ub) { return max_dist; } @@ -209,7 +217,7 @@ seq_t dtw_distance(seq_t *s1, idx_t l1, #ifdef DTWDEBUG printf("ri=%zu,ci=%zu, s1[i] = s1[%zu] = %f , s2[j] = s2[%zu] = %f\n", i, j, i, s1[i], j, s2[j]); #endif - d = EDIST(s1[i], s2[j]); + d = SEDIST(s1[i], s2[j]); if (d > max_step) { // Let the value be INFINITY as initialized continue; @@ -291,8 +299,10 @@ seq_t dtw_distance(seq_t *s1, idx_t l1, } + /** Compute the DTW between two n-dimensional series. +Use the Squared Euclidean inner distance. @param s1 First sequence @param l1 Length of first sequence. In tuples, real length should be length*ndim. @@ -304,6 +314,9 @@ Compute the DTW between two n-dimensional series. seq_t dtw_distance_ndim(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim, DTWSettings *settings) { + if (settings->inner_dist == 1) { + return dtw_distance_ndim_euclidean(s1, l1, s2, l2, ndim, settings); + } assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && settings->psi_2b <= l2 && settings->psi_2e <= l2); idx_t ldiff; @@ -324,7 +337,8 @@ seq_t dtw_distance_ndim(seq_t *s1, idx_t l1, printf("r=%zu, c=%zu\n", l1, l2); #endif if (settings->use_pruning || settings->only_ub) { - max_dist = pow(ub_euclidean_ndim(s1, l1, s2, l2, ndim), 2); + max_dist = ub_euclidean_ndim(s1, l1, s2, l2, ndim); + max_dist = pow(max_dist, 2); if (settings->only_ub) { return max_dist; } @@ -443,7 +457,7 @@ seq_t dtw_distance_ndim(seq_t *s1, idx_t l1, #endif d = 0; for (int d_i=0; d_i max_step) { // Let the value be INFINITY as initialized @@ -526,191 +540,1549 @@ seq_t dtw_distance_ndim(seq_t *s1, idx_t l1, } -// MARK: WPS -/*! -Compute all warping paths between two series. - -@param wps Empty array of length `(l1+1)*min(l2+1, abs(l1-l2) + 2*window-1)` in which the warping paths will be stored. - It represents the full matrix of warping paths between the two series. +/** +Compute the DTW between two series. +Use the Euclidean inner distance. + @param s1 First sequence -@param l1 Length of first sequence +@param l1 Length of first sequence. @param s2 Second sequence -@param l2 Length of second sequence -@param return_dtw If only the matrix is required, finding the dtw value can be skipped - to save operations. -@param do_sqrt Apply the sqrt operations on all items in the wps array. If not required, - this can be skipped to save operations. -@param psi_neg For psi-relaxation, replace non-optimal values with -1 +@param l2 Length of second sequence. @param settings A DTWSettings struct with options for the DTW algorithm. - -@return The dtw value if return_dtw is true; Otherwise -1. */ -seq_t dtw_warping_paths(seq_t *wps, - seq_t *s1, idx_t l1, - seq_t *s2, idx_t l2, - bool return_dtw, bool do_sqrt, bool psi_neg, - DTWSettings *settings) { - return dtw_warping_paths_ndim(wps, s1, l1, s2, l2, - return_dtw, do_sqrt, psi_neg, 1, - settings); -} - -seq_t dtw_warping_paths_ndim(seq_t *wps, - seq_t *s1, idx_t l1, - seq_t *s2, idx_t l2, - bool return_dtw, bool do_sqrt, bool psi_neg, - int ndim, - DTWSettings *settings) { +seq_t dtw_distance_euclidean(seq_t *s1, idx_t l1, + seq_t *s2, idx_t l2, + DTWSettings *settings) { + assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && + settings->psi_2b <= l2 && settings->psi_2e <= l2); + idx_t ldiff; + idx_t dl; // DTWPruned idx_t sc = 0; idx_t ec = 0; - idx_t ec_next; bool smaller_found; + idx_t ec_next; + // signal(SIGINT, dtw_int_handler); // not compatible with OMP - DTWWps p = dtw_wps_parts(l1, l2, settings); + idx_t window = settings->window; + seq_t max_step = settings->max_step; + seq_t max_dist = settings->max_dist; + seq_t penalty = settings->penalty; + + #ifdef DTWDEBUG + printf("r=%zu, c=%zu\n", l1, l2); + #endif if (settings->use_pruning || settings->only_ub) { - if (ndim == 1) { - p.max_dist = pow(ub_euclidean(s1, l1, s2, l2), 2); - } else { - p.max_dist = pow(ub_euclidean_ndim(s1, l1, s2, l2, ndim), 2); - } + max_dist = ub_euclidean_euclidean(s1, l1, s2, l2); if (settings->only_ub) { - if (do_sqrt) { - return sqrt(p.max_dist); - } else { - return p.max_dist; - } + return max_dist; } + } else if (max_dist == 0) { + max_dist = INFINITY; + } else { + max_dist = pow(max_dist, 2); } - - idx_t ri, ci, min_ci, max_ci, wpsi, wpsi_start; - - // Top row: ri = -1 - for (wpsi=0; wpsipsi_2b+1; wpsi++) { - // ci = wpsi - 1 - wps[wpsi] = 0; + if (l1 > l2) { + ldiff = l1 - l2; + dl = ldiff; + } else { + ldiff = l2 - l1; + dl = 0; } - for (wpsi=settings->psi_2b+1; wpsimax_length_diff != 0 && ldiff > settings->max_length_diff) { + return INFINITY; } - // First column: - wpsi = p.width; - for (ri=0; ripsi_1b; ri++) { - wps[wpsi] = 0; - wpsi += p.width; + if (window == 0) { + window = MAX(l1, l2); } - for (; ri 0); + seq_t * dtw = (seq_t *)malloc(sizeof(seq_t) * length * 2); + if (!dtw) { + printf("Error: dtw_distance - Cannot allocate memory (size=%zu)\n", length*2); + return 0; + } + idx_t i; + idx_t j; + for (j=0; jpsi_2b + 1; i++) { + dtw[i] = 0; + } + idx_t skip = 0; + idx_t skipp = 0; + int i0 = 1; + int i1 = 0; + idx_t minj; + idx_t maxj; + idx_t curidx = 0; + idx_t dl_window = dl + window - 1; + idx_t ldiff_window = window; + if (l2 > l1) { + ldiff_window += ldiff; + } + seq_t minv; seq_t d; - idx_t ri_idx, ci_idx; - - // This function splits the loop in four parts that result in different branches - // that would otherwise be in the loop (and are deterministic). - - // A. Rows: 0 <= ri < min(overlap_left_ri, overlap_right_ri) - // [0 0 x x x] - // [0 0 0 x x] - min_ci = 0; - max_ci = p.window + p.ldiffc; // ri < overlap_right_i - for (ri=0; ri p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} - wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, - wps[ri_widthp + wpsi - 1], // diagonal - wps[ri_widthp + wpsi] + p.penalty); - // PrunedDTW - if (wps[ri_width + wpsi] <= p.max_dist) { - smaller_found = true; - ec_next = ci + 1; - } else { - if (!smaller_found) - sc = ci + 1; - if (ci >= ec) - break; - } - wpsi++; + seq_t tempv; + seq_t psi_shortest = INFINITY; + // keepRunning = 1; + for (i=0; i dl_window) { + // maxj -= dl_window; + // } else { + // maxj = 0; + // } + maxj = (i - dl_window) * (i > dl_window); + // No risk for overflow/modulo because we also need to store dtw of size + // MIN(l2+1, ldiff + 2*window + 1) ? + minj = i + ldiff_window; + if (minj > l2) { + minj = l2; } - ec = ec_next; - for (idx_t i=ri_width + wpsi; i= overlap_right_i - for (ri=p.ri1; ri maxj) { + #ifdef DTWDEBUG + printf("correct maxj to sc: %zu -> %zu (saved %zu computations)\n", maxj, sc, sc-maxj); + #endif + maxj = sc; } smaller_found = false; - ec_next = ri; - for (; ci p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} - // B-region assumes wps has the same column indices in the previous row - wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, - wps[ri_widthp + wpsi - 1], // Diagonal - wps[ri_widthp + wpsi] + p.penalty); + ec_next = i; + // Deal with psi-relaxation in first column + if (settings->psi_1b != 0 && maxj == 0 && i < settings->psi_1b) { + dtw[i1*length + 0] = 0; + } + #ifdef DTWDEBUG + printf("i=%zu, maxj=%zu, minj=%zu\n", i, maxj, minj); + #endif + for (j=maxj; j max_step) { + // Let the value be INFINITY as initialized + continue; + } + curidx = i0 * length + j - skipp; + minv = dtw[curidx]; + curidx += 1; + tempv = dtw[curidx] + penalty; + if (tempv < minv) { + minv = tempv; + } + curidx = i1 * length + j - skip; + tempv = dtw[curidx] + penalty; + if (tempv < minv) { + minv = tempv; + } + #ifdef DTWDEBUG + printf("d = %f, minv = %f\n", d, minv); + #endif + curidx += 1; + dtw[curidx] = d + minv; + #ifdef DTWDEBUG + printf("%zu, %zu, %zu\n",i0*length + j - skipp,i0*length + j + 1 - skipp,i1*length + j - skip); + printf("%f, %f, %f\n",dtw[i0*length + j - skipp],dtw[i0*length + j + 1 - skipp],dtw[i1*length + j - skip]); + printf("i=%zu, j=%zu, d=%f, skip=%zu, skipp=%zu\n",i,j,d,skip,skipp); + #endif + // PrunedDTW + if (dtw[curidx] > max_dist) { + #ifdef DTWDEBUG + printf("dtw[%zu] = %f > %f\n", curidx, dtw[curidx], max_dist); + #endif + if (!smaller_found) { + sc = j + 1; + } + if (j >= ec) { + #ifdef DTWDEBUG + printf("Break because of pruning with j=%zu, ec=%zu (saved %zu computations)\n", j, ec, minj-j); + #endif + break; + } + } else { + smaller_found = true; + ec_next = j + 1; + } + } + ec = ec_next; + // Deal with Psi-relaxation in last column + if (settings->psi_1e != 0 && minj == l2 && l1 - 1 - i <= settings->psi_1e) { + assert(!(settings->window == 0 || settings->window == l2) || (i1 + 1)*length - 1 == curidx); + if (dtw[curidx] < psi_shortest) { + // curidx is the last value + psi_shortest = dtw[curidx]; + } + } + #ifdef DTWDEBUG + dtw_print_twoline(dtw, l1, l2, length, i0, i1, skip, skipp, maxj, minj); + #endif + } + if (window - 1 < 0) { + l2 += window - 1; + } + seq_t result = dtw[length * i1 + l2 - skip]; + // Deal with psi-relaxation in the last row + if (settings->psi_2e != 0) { + for (i=l2 - skip - settings->psi_2e; imax_dist !=0 && result > settings->max_dist) { + // DTWPruned keeps the last value larger than max_dist. Correct for this. + result = INFINITY; + } + return result; +} + + + +/** +Compute the DTW between two n-dimensional series. +Use the Euclidean inner distance. + +@param s1 First sequence +@param l1 Length of first sequence. In tuples, real length should be length*ndim. +@param s2 Second sequence +@param l2 Length of second sequence. In tuples, real length should be length*ndim. +@param ndim Number of dimensions +@param settings A DTWSettings struct with options for the DTW algorithm. +*/ +seq_t dtw_distance_ndim_euclidean(seq_t *s1, idx_t l1, + seq_t *s2, idx_t l2, int ndim, + DTWSettings *settings) { + assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && + settings->psi_2b <= l2 && settings->psi_2e <= l2); + idx_t ldiff; + idx_t dl; + // DTWPruned + idx_t sc = 0; + idx_t ec = 0; + bool smaller_found; + idx_t ec_next; + // signal(SIGINT, dtw_int_handler); // not compatible with OMP + + idx_t window = settings->window; + seq_t max_step = settings->max_step; + seq_t max_dist = settings->max_dist; + seq_t penalty = settings->penalty; + + #ifdef DTWDEBUG + printf("r=%zu, c=%zu\n", l1, l2); + #endif + if (settings->use_pruning || settings->only_ub) { + max_dist = ub_euclidean_ndim_euclidean(s1, l1, s2, l2, ndim); + if (settings->only_ub) { + return max_dist; + } + } else if (max_dist == 0) { + max_dist = INFINITY; + } else { + max_dist = pow(max_dist, 2); + } + if (l1 > l2) { + ldiff = l1 - l2; + dl = ldiff; + } else { + ldiff = l2 - l1; + dl = 0; + } + if (settings->max_length_diff != 0 && ldiff > settings->max_length_diff) { + return INFINITY; + } + if (window == 0) { + window = MAX(l1, l2); + } + if (max_step == 0) { + max_step = INFINITY; + } else { + max_step = pow(max_step, 2); + } + penalty = pow(penalty, 2); + // rows is for series 1, columns is for series 2 + idx_t length = MIN(l2+1, ldiff + 2*window + 1); + assert(length > 0); + seq_t * dtw = (seq_t *)malloc(sizeof(seq_t) * length * 2); + if (!dtw) { + printf("Error: dtw_distance_ndim - Cannot allocate memory (size=%zu)\n", length*2); + return 0; + } + idx_t i; + idx_t j; + idx_t i_idx; + idx_t j_idx; + for (j=0; jpsi_2b + 1; i++) { + dtw[i] = 0; + } + idx_t skip = 0; + idx_t skipp = 0; + int i0 = 1; + int i1 = 0; + idx_t minj; + idx_t maxj; + idx_t curidx = 0; + idx_t dl_window = dl + window - 1; + idx_t ldiff_window = window; + if (l2 > l1) { + ldiff_window += ldiff; + } + seq_t minv; + seq_t d; + seq_t tempv; + seq_t psi_shortest = INFINITY; + // keepRunning = 1; + for (i=0; i dl_window) { + // maxj -= dl_window; + // } else { + // maxj = 0; + // } + maxj = (i - dl_window) * (i > dl_window); + // No risk for overflow/modulo because we also need to store dtw of size + // MIN(l2+1, ldiff + 2*window + 1) ? + minj = i + ldiff_window; + if (minj > l2) { + minj = l2; + } + skipp = skip; + skip = maxj; + i0 = 1 - i0; + i1 = 1 - i1; + // Reset new line i1 + for (j=0; j maxj) { + #ifdef DTWDEBUG + printf("correct maxj to sc: %zu -> %zu (saved %zu computations)\n", maxj, sc, sc-maxj); + #endif + maxj = sc; + } + smaller_found = false; + ec_next = i; + // Deal with psi-relaxation in first column + if (settings->psi_1b != 0 && maxj == 0 && i < settings->psi_1b) { + dtw[i1*length + 0] = 0; + } + #ifdef DTWDEBUG + printf("i=%zu, maxj=%zu, minj=%zu\n", i, maxj, minj); + #endif + for (j=maxj; j max_step) { + // Let the value be INFINITY as initialized + continue; + } + curidx = i0 * length + j - skipp; + minv = dtw[curidx]; + curidx += 1; + tempv = dtw[curidx] + penalty; + if (tempv < minv) { + minv = tempv; + } + curidx = i1 * length + j - skip; + tempv = dtw[curidx] + penalty; + if (tempv < minv) { + minv = tempv; + } + #ifdef DTWDEBUG + printf("d = %f, minv = %f\n", d, minv); + #endif + curidx += 1; + dtw[curidx] = d + minv; + #ifdef DTWDEBUG + printf("%zu, %zu, %zu\n",i0*length + j - skipp,i0*length + j + 1 - skipp,i1*length + j - skip); + printf("%f, %f, %f\n",dtw[i0*length + j - skipp],dtw[i0*length + j + 1 - skipp],dtw[i1*length + j - skip]); + printf("i=%zu, j=%zu, d=%f, skip=%zu, skipp=%zu\n",i,j,d,skip,skipp); + #endif + // PrunedDTW + if (dtw[curidx] > max_dist) { + #ifdef DTWDEBUG + printf("dtw[%zu] = %f > %f\n", curidx, dtw[curidx], max_dist); + #endif + if (!smaller_found) { + sc = j + 1; + } + if (j >= ec) { + #ifdef DTWDEBUG + printf("Break because of pruning with j=%zu, ec=%zu (saved %zu computations)\n", j, ec, minj-j); + #endif + break; + } + } else { + smaller_found = true; + ec_next = j + 1; + } + } + ec = ec_next; + // Deal with Psi-relaxation in last column + if (settings->psi_1e != 0 && minj == l2 && l1 - 1 - i <= settings->psi_1e) { + assert(!(settings->window == 0 || settings->window == l2) || (i1 + 1)*length - 1 == curidx); + if (dtw[curidx] < psi_shortest) { + // curidx is the last value + psi_shortest = dtw[curidx]; + } + } + #ifdef DTWDEBUG + dtw_print_twoline(dtw, l1, l2, length, i0, i1, skip, skipp, maxj, minj); + #endif + } + if (window - 1 < 0) { + l2 += window - 1; + } + seq_t result = dtw[length * i1 + l2 - skip]; + // Deal with psi-relaxation in the last row + if (settings->psi_2e != 0) { + for (i=l2 - skip - settings->psi_2e; imax_dist !=0 && result > settings->max_dist) { + // DTWPruned keeps the last value larger than max_dist. Correct for this. + result = INFINITY; + } + return result; +} + + +// MARK: WPS + +/*! +Compute all warping paths between two series. + +@param wps Empty array of length `(l1+1)*min(l2+1, abs(l1-l2) + 2*window-1)` in which the warping paths will be stored. + It represents the full matrix of warping paths between the two series. +@param s1 First sequence +@param l1 Length of first sequence +@param s2 Second sequence +@param l2 Length of second sequence +@param return_dtw If only the matrix is required, finding the dtw value can be skipped + to save operations. +@param do_sqrt Apply the sqrt operations on all items in the wps array. If not required, + this can be skipped to save operations. +@param psi_neg For psi-relaxation, replace non-optimal values with -1 +@param settings A DTWSettings struct with options for the DTW algorithm. + +@return The dtw value if return_dtw is true; Otherwise -1. +*/ +seq_t dtw_warping_paths(seq_t *wps, + seq_t *s1, idx_t l1, + seq_t *s2, idx_t l2, + bool return_dtw, bool do_sqrt, bool psi_neg, + DTWSettings *settings) { + return dtw_warping_paths_ndim(wps, s1, l1, s2, l2, + return_dtw, do_sqrt, psi_neg, 1, + settings); +} + + + +seq_t dtw_warping_paths_ndim(seq_t *wps, + seq_t *s1, idx_t l1, + seq_t *s2, idx_t l2, + bool return_dtw, bool do_sqrt, bool psi_neg, + int ndim, + DTWSettings *settings) { + if (settings->inner_dist == 1) { + return dtw_warping_paths_ndim_euclidean(wps, s1, l1, s2, l2, + return_dtw, do_sqrt, psi_neg, ndim, settings); + } + // DTWPruned + idx_t sc = 0; + idx_t ec = 0; + idx_t ec_next; + bool smaller_found; + + DTWWps p = dtw_wps_parts(l1, l2, settings); + if (settings->use_pruning || settings->only_ub) { + if (ndim == 1) { + p.max_dist = ub_euclidean(s1, l1, s2, l2); + } else { + p.max_dist = ub_euclidean_ndim(s1, l1, s2, l2, ndim); + } + p.max_dist = pow(p.max_dist, 2); + if (settings->only_ub) { + if (do_sqrt) { + return sqrt(p.max_dist); + } else { + return p.max_dist; + } + } + } + + idx_t ri, ci, min_ci, max_ci, wpsi, wpsi_start; + + // Top row: ri = -1 + for (wpsi=0; wpsipsi_2b+1; wpsi++) { + // ci = wpsi - 1 + wps[wpsi] = 0; + } + for (wpsi=settings->psi_2b+1; wpsipsi_1b; ri++) { + wps[wpsi] = 0; + wpsi += p.width; + } + for (; ri p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi - 1], // diagonal + wps[ri_widthp + wpsi] + p.penalty); + // PrunedDTW + if (wps[ri_width + wpsi] <= p.max_dist) { + smaller_found = true; + ec_next = ci + 1; + } else { + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; i= overlap_right_i + for (ri=p.ri1; ri p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + // B-region assumes wps has the same column indices in the previous row + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi - 1], // Diagonal + wps[ri_widthp + wpsi] + p.penalty); + // PrunedDTW + if (wps[ri_width + wpsi] <= p.max_dist) { + smaller_found = true; + ec_next = ci + 1; + } else { + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; i p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + // C-region assumes wps has the column indices in the previous row shifted by one + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi], // Diagonal + wps[ri_widthp + wpsi + 1] + p.penalty); + // PrunedDTW + if (wps[ri_width + wpsi] <= p.max_dist) { + smaller_found = true; + ec_next = ci + 1; + } else { + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; i p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + // D-region assumes wps has the same column indices in the previous row + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi - 1], // Diagonal + wps[ri_widthp + wpsi] + p.penalty); + // PrunedDTW + if (wps[ri_width + wpsi] <= p.max_dist) { + smaller_found = true; + ec_next = ci + 1; + } else { + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; ipsi_1e == 0 && settings->psi_2e == 0) { + rvalue = wps[final_wpsi]; + } else if (return_dtw) { + seq_t mir_value = INFINITY; + idx_t mir_rel = 0; + seq_t mic_value = INFINITY; + idx_t mic = 0; + // Find smallest value in last column + if (settings->psi_1e != 0) { + wpsi = final_wpsi; + for (ri=l1-1; ri>l1-settings->psi_1e-2; ri--) { + if (wps[wpsi] < mir_value) { + mir_value = wps[wpsi]; + mir_rel = ri + 1; + } else { + // pass + } + wpsi -= p.width; + } + } + // Find smallest value in last row + if (settings->psi_2e != 0) { + wpsi = final_wpsi; + for (ci=l2-1; ci>l2-settings->psi_2e-2; ci--) { + if (wps[wpsi] < mic_value) { + mic_value = wps[wpsi]; + mic = ci + 1; + } else { + // pass + } + wpsi -= 1; + } + } + // Set values with higher indices than the smallest value to -1 + // and return smallest value as DTW + if (mir_value < mic_value) { + // last column has smallest value + if (psi_neg) { + for (idx_t ri=mir_rel + 1; rimax_dist > 0 && rvalue > settings->max_dist) { + // DTWPruned keeps the last value larger than max_dist. Correct for this. + rvalue = INFINITY; + } + if (do_sqrt) { + for (idx_t i=0; i 0) { + wps[i] = sqrt(wps[i]); + } + } + if (return_dtw) { + if (rvalue > 0) { + rvalue = sqrt(rvalue); + } + } + } + + return rvalue; +} + +seq_t dtw_warping_paths_euclidean( + seq_t *wps, + seq_t *s1, idx_t l1, + seq_t *s2, idx_t l2, + bool return_dtw, bool do_sqrt, bool psi_neg, + DTWSettings *settings) { + return dtw_warping_paths_ndim_euclidean( + wps, s1, l1, s2, l2, + return_dtw, do_sqrt, psi_neg, 1, + settings); +} + + + +seq_t dtw_warping_paths_ndim_euclidean(seq_t *wps, + seq_t *s1, idx_t l1, + seq_t *s2, idx_t l2, + bool return_dtw, bool do_sqrt, bool psi_neg, + int ndim, + DTWSettings *settings) { + // DTWPruned + idx_t sc = 0; + idx_t ec = 0; + idx_t ec_next; + bool smaller_found; + + DTWWps p = dtw_wps_parts(l1, l2, settings); + if (settings->use_pruning || settings->only_ub) { + if (ndim == 1) { + p.max_dist = ub_euclidean(s1, l1, s2, l2); + } else { + p.max_dist = ub_euclidean_ndim(s1, l1, s2, l2, ndim); + } + if (settings->only_ub) { + if (do_sqrt) { + return sqrt(p.max_dist); + } else { + return p.max_dist; + } + } + } + + idx_t ri, ci, min_ci, max_ci, wpsi, wpsi_start; + + // Top row: ri = -1 + for (wpsi=0; wpsipsi_2b+1; wpsi++) { + // ci = wpsi - 1 + wps[wpsi] = 0; + } + for (wpsi=settings->psi_2b+1; wpsipsi_1b; ri++) { + wps[wpsi] = 0; + wpsi += p.width; + } + for (; ri p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi - 1], // diagonal + wps[ri_widthp + wpsi] + p.penalty); + // PrunedDTW + if (wps[ri_width + wpsi] <= p.max_dist) { + smaller_found = true; + ec_next = ci + 1; + } else { + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; i= overlap_right_i + for (ri=p.ri1; ri p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + // B-region assumes wps has the same column indices in the previous row + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi - 1], // Diagonal + wps[ri_widthp + wpsi] + p.penalty); + // PrunedDTW + if (wps[ri_width + wpsi] <= p.max_dist) { + smaller_found = true; + ec_next = ci + 1; + } else { + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; i p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + // C-region assumes wps has the column indices in the previous row shifted by one + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi], // Diagonal + wps[ri_widthp + wpsi + 1] + p.penalty); + // PrunedDTW + if (wps[ri_width + wpsi] <= p.max_dist) { + smaller_found = true; + ec_next = ci + 1; + } else { + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; i p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} + // D-region assumes wps has the same column indices in the previous row + wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, + wps[ri_widthp + wpsi - 1], // Diagonal + wps[ri_widthp + wpsi] + p.penalty); // PrunedDTW if (wps[ri_width + wpsi] <= p.max_dist) { smaller_found = true; ec_next = ci + 1; } else { - if (!smaller_found) - sc = ci + 1; - if (ci >= ec) - break; + if (!smaller_found) + sc = ci + 1; + if (ci >= ec) + break; + } + wpsi++; + } + ec = ec_next; + for (idx_t i=ri_width + wpsi; ipsi_1e == 0 && settings->psi_2e == 0) { + rvalue = wps[final_wpsi]; + } else if (return_dtw) { + seq_t mir_value = INFINITY; + idx_t mir_rel = 0; + seq_t mic_value = INFINITY; + idx_t mic = 0; + // Find smallest value in last column + if (settings->psi_1e != 0) { + wpsi = final_wpsi; + for (ri=l1-1; ri>l1-settings->psi_1e-2; ri--) { + if (wps[wpsi] < mir_value) { + mir_value = wps[wpsi]; + mir_rel = ri + 1; + } else { + // pass + } + wpsi -= p.width; + } + } + // Find smallest value in last row + if (settings->psi_2e != 0) { + wpsi = final_wpsi; + for (ci=l2-1; ci>l2-settings->psi_2e-2; ci--) { + if (wps[wpsi] < mic_value) { + mic_value = wps[wpsi]; + mic = ci + 1; + } else { + // pass + } + wpsi -= 1; + } + } + // Set values with higher indices than the smallest value to -1 + // and return smallest value as DTW + if (mir_value < mic_value) { + // last column has smallest value + if (psi_neg) { + for (idx_t ri=mir_rel + 1; rimax_dist > 0 && rvalue > settings->max_dist) { + // DTWPruned keeps the last value larger than max_dist. Correct for this. + rvalue = INFINITY; + } + + return rvalue; +} + + +/*! + Expand the compact wps datastructure to a full `(l1+1)*(l2+1)` sized matrix. + */ +void dtw_expand_wps(seq_t *wps, seq_t *full, + idx_t l1, idx_t l2, DTWSettings *settings) { + dtw_expand_wps_slice(wps, full, l1, l2, 0, l1+1, 0, l2+1, settings); +} + +/*! + Expand the compact wps datastructure to a full `(re-rb)*(ce-cb)` sized matrix that + represents the slice `[rb:re,cb:ce]` of the full matrix. + + @param wps Compact warping paths matrix + @param full Sequence of length `(re-rb)*(ce-cb)` + Will be filled with values. + @param l1 Length of series 1 + @param l2 Length of series 2 + @param rb Start of slice row (0 <= rb <= l1+1) + @param re End of slice row (0 <= rb <= l1+1) + @param cb Start of slice column (0 <= rc <= l2+1) + @param ce End of slice column (0 <= rc <= l2+1) + @param settings DTWSetting object + */ +void dtw_expand_wps_slice(seq_t *wps, seq_t *full, + idx_t l1, idx_t l2, + idx_t rb, idx_t re, idx_t cb, idx_t ce, + DTWSettings *settings) { + DTWWps p = dtw_wps_parts(l1, l2, settings); + + idx_t ri, ci, min_ci, max_ci, wpsi, wpsi_start; + idx_t rbs = 0; + if (rb > 0) { rbs = rb - 1; } + idx_t res = 0; + if (re > 0) { res = re - 1; } + idx_t cbs = 0; + if (cb > 0) { cbs = cb - 1; } + idx_t ces = 0; + if (ce > 0) { ces = ce - 1; } + idx_t fwidth = ce - cb; + + for (idx_t i=0; i<(re-rb)*(ce-cb); i++) { + full[i] = INFINITY; + } + + // Top row: ri = -1 + if (rb == 0 && cb == 0) { + full[0] = wps[0]; + } + if (rb == 0) { + wpsi = 1 + cbs; + for (ci=cbs; ci= overlap_right_i + if (rbs < p.ri2) { + for (ri=MAX(rbs, p.ri1); ri p.ri2) { + // min_ci += rbs - p.ri2; + // max_ci += rbs - p.ri2; + // } + for (ri=MAX(rbs, p.ri2); ri p.ri3) { + // min_ci += rbs - p.ri3; + // wpsi_start += rbs - p.ri3; + // } + for (ri=MAX(rbs, p.ri3); riinner_dist == 1) { + return dtw_warping_paths_affinity_ndim_euclidean(wps, s1, l1, s2, l2, + return_dtw, do_sqrt, psi_neg, only_triu,ndim, gamma, tau, delta, delta_factor, settings); + } + seq_t dtw_prev; + + DTWWps p = dtw_wps_parts(l1, l2, settings); + + idx_t ri, ci, min_ci, max_ci, wpsi, wpsi_start; + + // Top row: ri = -1 + for (wpsi=0; wpsipsi_2b+1; wpsi++) { + // ci = wpsi - 1 + wps[wpsi] = 0; + } + for (wpsi=settings->psi_2b+1; wpsipsi_1b; ri++) { + wps[wpsi] = 0; + wpsi += p.width; + } + for (; ri= overlap_right_i + for (ri=p.ri1; ri p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} - // C-region assumes wps has the column indices in the previous row shifted by one - wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, - wps[ri_widthp + wpsi], // Diagonal - wps[ri_widthp + wpsi + 1] + p.penalty); - // PrunedDTW - if (wps[ri_width + wpsi] <= p.max_dist) { - smaller_found = true; - ec_next = ci + 1; + d = exp(-gamma * d); + dtw_prev = MAX3(wps[ri_width + wpsi -1] - p.penalty, + wps[ri_widthp + wpsi ], // diagonal + wps[ri_widthp + wpsi +1] - p.penalty); + if (d < tau) { + dtw_prev = delta + delta_factor * dtw_prev; } else { - if (!smaller_found) - sc = ci + 1; - if (ci >= ec) - break; + dtw_prev = d + dtw_prev; + } + if (dtw_prev < 0) { + dtw_prev = 0; } + wps[ri_width + wpsi] = dtw_prev; wpsi++; } - ec = ec_next; for (idx_t i=ri_width + wpsi; i p.max_step) { wps[ri_width + wpsi] = INFINITY; wpsi++; continue;} - // D-region assumes wps has the same column indices in the previous row - wps[ri_width + wpsi] = d + MIN3(wps[ri_width + wpsi - 1] + p.penalty, - wps[ri_widthp + wpsi - 1], // Diagonal - wps[ri_widthp + wpsi] + p.penalty); - // PrunedDTW - if (wps[ri_width + wpsi] <= p.max_dist) { - smaller_found = true; - ec_next = ci + 1; + d = exp(-gamma * d); + dtw_prev = MAX3(wps[ri_width + wpsi -1] - p.penalty, + wps[ri_widthp + wpsi -1], // diagonal + wps[ri_widthp + wpsi ] - p.penalty); + if (d < tau) { + dtw_prev = delta + delta_factor * dtw_prev; } else { - if (!smaller_found) - sc = ci + 1; - if (ci >= ec) - break; + dtw_prev = d + dtw_prev; + } + if (dtw_prev < 0) { + dtw_prev = 0; } + wps[ri_width + wpsi] = dtw_prev; wpsi++; } - ec = ec_next; for (idx_t i=ri_width + wpsi; ipsi_1e == 0 && settings->psi_2e == 0) { rvalue = wps[final_wpsi]; } else if (return_dtw) { - seq_t mir_value = INFINITY; + seq_t mir_value = -INFINITY; idx_t mir_rel = 0; - seq_t mic_value = INFINITY; + seq_t mic_value = -INFINITY; idx_t mic = 0; // Find smallest value in last column if (settings->psi_1e != 0) { @@ -902,9 +2266,8 @@ seq_t dtw_warping_paths_ndim(seq_t *wps, if (settings->max_dist > 0 && rvalue > settings->max_dist) { // DTWPruned keeps the last value larger than max_dist. Correct for this. - rvalue = INFINITY; + rvalue = -INFINITY; } - if (do_sqrt) { for (idx_t i=0; i 0) { @@ -922,165 +2285,8 @@ seq_t dtw_warping_paths_ndim(seq_t *wps, } -/*! - Expand the compact wps datastructure to a full `(l1+1)*(l2+1)` sized matrix. - */ -void dtw_expand_wps(seq_t *wps, seq_t *full, - idx_t l1, idx_t l2, DTWSettings *settings) { - dtw_expand_wps_slice(wps, full, l1, l2, 0, l1+1, 0, l2+1, settings); -} - -/*! - Expand the compact wps datastructure to a full `(re-rb)*(ce-cb)` sized matrix that - represents the slice `[rb:re,cb:ce]` of the full matrix. - - @param wps Compact warping paths matrix - @param full Sequence of length `(re-rb)*(ce-cb)` - Will be filled with values. - @param l1 Length of series 1 - @param l2 Length of series 2 - @param rb Start of slice row (0 <= rb <= l1+1) - @param re End of slice row (0 <= rb <= l1+1) - @param cb Start of slice column (0 <= rc <= l2+1) - @param ce End of slice column (0 <= rc <= l2+1) - @param settings DTWSetting object - */ -void dtw_expand_wps_slice(seq_t *wps, seq_t *full, - idx_t l1, idx_t l2, - idx_t rb, idx_t re, idx_t cb, idx_t ce, - DTWSettings *settings) { - DTWWps p = dtw_wps_parts(l1, l2, settings); - - idx_t ri, ci, min_ci, max_ci, wpsi, wpsi_start; - idx_t rbs = 0; - if (rb > 0) { rbs = rb - 1; } - idx_t res = 0; - if (re > 0) { res = re - 1; } - idx_t cbs = 0; - if (cb > 0) { cbs = cb - 1; } - idx_t ces = 0; - if (ce > 0) { ces = ce - 1; } - idx_t fwidth = ce - cb; - - for (idx_t i=0; i<(re-rb)*(ce-cb); i++) { - full[i] = INFINITY; - } - - // Top row: ri = -1 - if (rb == 0 && cb == 0) { - full[0] = wps[0]; - } - if (rb == 0) { - wpsi = 1 + cbs; - for (ci=cbs; ci= overlap_right_i - if (rbs < p.ri2) { - for (ri=MAX(rbs, p.ri1); ri 0) { - wps[i] = sqrt(wps[i]); - } - } - if (return_dtw) { - if (rvalue > 0) { - rvalue = sqrt(rvalue); - } - } - } - return rvalue; } @@ -1517,6 +2714,10 @@ void dtw_expand_wps_slice_affinity(seq_t *wps, seq_t *full, min_ci = 1; max_ci = 1 + 2 * p.window - 1 + p.ldiff; if (rbs < p.ri3) { + // if (rbs > p.ri2) { + // min_ci += rbs - p.ri2; + // max_ci += rbs - p.ri2; + // } for (ri=MAX(rbs, p.ri2); ri p.ri3) { + // min_ci += rbs - p.ri3; + // wpsi_start += rbs - p.ri3; + // } for (ri=MAX(rbs, p.ri3); riinner_dist == 1) { + d = dtw_warping_paths_ndim_euclidean(wps, from_s, from_l, to_s, to_l, true, false, true, ndim, settings); + } else { + d = dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, true, false, true, ndim, settings); + d = sqrt(d); + } *length_i = dtw_best_path(wps, from_i, to_i, from_l, to_l, settings); free(wps); return d; @@ -2560,11 +3778,34 @@ seq_t ub_euclidean_ndim(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim) { return euclidean_distance_ndim(s1, l1, s2, l2, ndim); } +/*! + Euclidean upper bound for DTW. + + @see ed.euclidean_distance. + */ +seq_t ub_euclidean_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2) { + return euclidean_distance_euclidean(s1, l1, s2, l2); +} + + +/*! + Euclidean upper bound for DTW. + + @see ed.euclidean_distance_ndim. +*/ +seq_t ub_euclidean_ndim_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim) { + return euclidean_distance_ndim_euclidean(s1, l1, s2, l2, ndim); +} + + /*! Keogh lower bound for DTW. */ seq_t lb_keogh(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings) { + if (settings->inner_dist == 1) { + return lb_keogh_euclidean(s1, l1, s2, l2, settings); + } idx_t window = settings->window; if (window == 0) { window = MAX(l1, l2); @@ -2579,10 +3820,9 @@ seq_t lb_keogh(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings) imin_diff += l1 - l2; } idx_t imax_diff = window; - if (l1 < l2) { + if (l2 > l1) { imax_diff += l2 - l1; } - for (idx_t i=0; i imin_diff) { imin = i - imin_diff; @@ -2612,7 +3852,62 @@ seq_t lb_keogh(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings) t += (li - ci)*(li - ci); } } - return sqrt(t); + t = sqrt(t); + return t; +} + + +/*! + Keogh lower bound for DTW. + */ +seq_t lb_keogh_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings) { + idx_t window = settings->window; + if (window == 0) { + window = MAX(l1, l2); + } + idx_t imin, imax; + seq_t t = 0; + seq_t ui; + seq_t li; + seq_t ci; + idx_t imin_diff = window - 1; + if (l1 > l2) { + imin_diff += l1 - l2; + } + idx_t imax_diff = window; + if (l2 > l1) { + imax_diff += l2 - l1; + } + for (idx_t i=0; i imin_diff) { + imin = i - imin_diff; + } else { + imin = 0; + } + imax = i + imax_diff; + if (imax > l2) { + imax = l2; + } + ui = 0; + for (idx_t j=imin; j ui) { + ui = s2[j]; + } + } + li = INFINITY; + for (idx_t j=imin; j ui) { + t += fabs(ci - ui); + } else if (ci < li) { + t += li - ci; + } + } + return t; } diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h index 8af23a9d..f7c77f3d 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h @@ -40,6 +40,11 @@ static char printFormat[5]; #pragma GCC diagnostic pop +// Inner distance options +//const int kSquaredEuclidean = 0; +//const int kEuclidean = 1; + + /** Settings for DTW operations: @@ -70,6 +75,7 @@ struct DTWSettings_s { idx_t psi_2e; // series 2, end psi bool use_pruning; bool only_ub; + int inner_dist; // 0=squared euclidean, 1=euclidean }; typedef struct DTWSettings_s DTWSettings; @@ -122,12 +128,19 @@ typedef seq_t (*DTWFnPtr)(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings seq_t dtw_distance(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings); seq_t dtw_distance_ndim(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim, DTWSettings *settings); +seq_t dtw_distance_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings); +seq_t dtw_distance_ndim_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim, DTWSettings *settings); // WPS seq_t dtw_warping_paths(seq_t *wps, seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, bool return_dtw, bool do_sqrt, bool psi_neg, DTWSettings *settings); seq_t dtw_warping_paths_ndim(seq_t *wps, seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, bool return_dtw, bool do_sqrt, bool psi_neg, int ndim, DTWSettings *settings); +seq_t dtw_warping_paths_euclidean(seq_t *wps, seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, bool return_dtw, bool do_sqrt, bool psi_neg, DTWSettings *settings); +seq_t dtw_warping_paths_ndim_euclidean(seq_t *wps, seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, bool return_dtw, bool do_sqrt, bool psi_neg, int ndim, DTWSettings *settings); seq_t dtw_warping_paths_affinity(seq_t *wps, seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, bool return_dtw, bool do_sqrt, bool psi_neg, bool only_triu, seq_t gamma, seq_t tau, seq_t delta, seq_t delta_factor, DTWSettings *settings); seq_t dtw_warping_paths_affinity_ndim(seq_t *wps, seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, bool return_dtw, bool do_sqrt, bool psi_neg, bool only_triu, int ndim, seq_t gamma, seq_t tau, seq_t delta, seq_t delta_factor, DTWSettings *settings); +seq_t dtw_warping_paths_affinity_ndim_euclidean(seq_t *wps, seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, + bool return_dtw, bool do_sqrt, bool psi_neg, bool only_triu, int ndim, + seq_t gamma, seq_t tau, seq_t delta, seq_t delta_factor, DTWSettings *settings); void dtw_expand_wps(seq_t *wps, seq_t *full, idx_t l1, idx_t l2, DTWSettings *settings); void dtw_expand_wps_slice(seq_t *wps, seq_t *full, idx_t l1, idx_t l2, idx_t rb, idx_t re, idx_t cb, idx_t ce, DTWSettings *settings); void dtw_expand_wps_affinity(seq_t *wps, seq_t *full, idx_t l1, idx_t l2, DTWSettings *settings); @@ -151,7 +164,10 @@ DTWWps dtw_wps_parts(idx_t l1, idx_t l2, DTWSettings * settings); // Bound seq_t ub_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2); seq_t ub_euclidean_ndim(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim); +seq_t ub_euclidean_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2); +seq_t ub_euclidean_ndim_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim); seq_t lb_keogh(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings); +seq_t lb_keogh_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings); // Block DTWBlock dtw_block_empty(void); diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_ed.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_ed.c index 9e4c695a..cff7a5c4 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_ed.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_ed.c @@ -8,45 +8,82 @@ #include "dd_ed.h" + + + /*! - Euclidean distance between two sequences of values, can differ in length. - - If the two series differ in length, compare the last element of the shortest series - to the remaining elements in the longer series. This is compatible with Euclidean - distance being used as an upper bound for DTW. - - @param s1 : Sequence of numbers - @param s2 : Sequence of numbers - @return Euclidean distance - - */ +Euclidean distance between two sequences of values, can differ in length. + +If the two series differ in length, compare the last element of the shortest series +to the remaining elements in the longer series. This is compatible with Euclidean +distance being used as an upper bound for DTW. + +@param s1 : Sequence of numbers. +@param s2 : Sequence of numbers. +@return Euclidean distance +*/ seq_t euclidean_distance(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2) { idx_t n = MIN(l1, l2); seq_t ub = 0; for (idx_t i=0; i l2) { for (idx_t i=n; i l2) { + for (idx_t i=n; i l2) { for (idx_t i=n; i l2) { + for (idx_t i=n; ifn)(s1, 9, s2, 9, ¶m->settings); +// printf("d=%f\n", d); + cr_assert_float_eq(d, 2, 0.001); +} + ParameterizedTestParameters(dtw, test_series2) { static struct dtw_test_params params[] = { diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/Makefile b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/Makefile index 2d97deb4..e222272d 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/Makefile +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/Makefile @@ -1,12 +1,18 @@ DEPS_dd_dtw = $(shell python3 generate.py -dq dd_dtw.c) DEPS_dd_dtw_openmp := $(shell python3 generate.py -dq dd_dtw_openmp.c) +DEPS_dd_ed := $(shell python3 generate.py -dq dd_ed.c) +TARGETS = ${shell python3 generate.py -ta} .PHONY: generate generate: jinja replace +.PHONY: clean +clean: + rm -f $(TARGETS) + .PHONY: jinja -jinja: dd_dtw.c dd_dtw_openmp.c +jinja: dd_dtw.c dd_dtw_openmp.c dd_ed.c dd_dtw.c: $(DEPS_dd_dtw) @echo "Changed:" $? @@ -22,8 +28,15 @@ dd_dtw_openmp.c: $(DEPS_dd_dtw_openmp) ../dd_dtw_openmp.c: dd_dtw_openmp.c cp dd_dtw_openmp.c ../ +dd_ed.c: $(DEPS_dd_ed) + @echo "Changed:" $? + python3 generate.py $@ + +../dd_ed.c: dd_ed.c + cp dd_ed.c ../ + .PHONY: replace -replace: ../dd_dtw.c ../dd_dtw_openmp.c +replace: ../dd_dtw.c ../dd_dtw_openmp.c ../dd_ed.c .PHONY: debug debug: diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c index 1e226b5e..5058fe3a 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c @@ -26,7 +26,8 @@ DTWSettings dtw_settings_default(void) { .psi_2b = 0, .psi_2e = 0, .use_pruning = false, - .only_ub = false + .only_ub = false, + .inner_dist = 0 }; return s; } @@ -59,15 +60,26 @@ void dtw_settings_print(DTWSettings *settings) { settings->psi_2b, settings->psi_2e); printf(" use_pruning = %d\n", settings->use_pruning); printf(" only_ub = %d\n", settings->only_ub); + printf(" inner_dist = %d\n", settings->inner_dist); printf("}\n"); } // MARK: DTW {% set suffix = '' %} +{% set inner_dist = 'squaredeuclidean' %} {%- include 'dtw_distance.jinja.c' %} {% set suffix = '_ndim' %} +{% set inner_dist = 'squaredeuclidean' %} +{%- include 'dtw_distance.jinja.c' %} + +{% set suffix = '' %} +{% set inner_dist = 'euclidean' %} +{%- include 'dtw_distance.jinja.c' %} + +{% set suffix = '_ndim' %} +{% set inner_dist = 'euclidean' %} {%- include 'dtw_distance.jinja.c' %} // MARK: WPS @@ -101,6 +113,23 @@ seq_t dtw_warping_paths(seq_t *wps, } {% set suffix = '_ndim' %} +{% set inner_dist = 'squaredeuclidean' %} +{%- include 'dtw_warpingpaths.jinja.c' %} + +seq_t dtw_warping_paths_euclidean( + seq_t *wps, + seq_t *s1, idx_t l1, + seq_t *s2, idx_t l2, + bool return_dtw, bool do_sqrt, bool psi_neg, + DTWSettings *settings) { + return dtw_warping_paths_ndim_euclidean( + wps, s1, l1, s2, l2, + return_dtw, do_sqrt, psi_neg, 1, + settings); +} + +{% set suffix = '_ndim' %} +{% set inner_dist = 'euclidean' %} {%- include 'dtw_warpingpaths.jinja.c' %} @@ -122,6 +151,11 @@ seq_t dtw_warping_paths_affinity(seq_t *wps, {% set suffix = '_affinity_ndim' %} +{% set inner_dist = 'squaredeuclidean' %} +{%- include 'dtw_warpingpaths.jinja.c' %} + +{% set suffix = '_affinity_ndim' %} +{% set inner_dist = 'euclidean' %} {%- include 'dtw_warpingpaths.jinja.c' %} @@ -767,7 +801,15 @@ idx_t dtw_best_path_prob(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, s /*! Compute warping path between two sequences. - @return length of path + @param from_s First sequence + @param from_l Length of first sequence + @param to_s Second sequence + @param to_l Length of second sequence + @param from_i Stores warping path indices for the first sequence + @param to_i Stores warping path indices for the second sequence + @param length_i Stores resulting path length for from_i and to_i + @param settings Settings object + @return distance */ seq_t dtw_warping_path(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t * length_i, DTWSettings * settings) { return dtw_warping_path_ndim(from_s, from_l, to_s, to_l, from_i, to_i, length_i, 1, settings); @@ -776,8 +818,13 @@ seq_t dtw_warping_path(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx seq_t dtw_warping_path_ndim(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t * length_i, int ndim, DTWSettings * settings) { idx_t wps_length = dtw_settings_wps_length(from_l, to_l, settings); seq_t *wps = (seq_t *)malloc(wps_length * sizeof(seq_t)); - seq_t d = dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, true, false, true, ndim, settings); - d = sqrt(d); + seq_t d; + if (settings->inner_dist == 1) { + d = dtw_warping_paths_ndim_euclidean(wps, from_s, from_l, to_s, to_l, true, false, true, ndim, settings); + } else { + d = dtw_warping_paths_ndim(wps, from_s, from_l, to_s, to_l, true, false, true, ndim, settings); + d = sqrt(d); + } *length_i = dtw_best_path(wps, from_i, to_i, from_l, to_l, settings); free(wps); return d; @@ -894,68 +941,33 @@ seq_t ub_euclidean_ndim(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim) { return euclidean_distance_ndim(s1, l1, s2, l2, ndim); } - /*! - Keogh lower bound for DTW. + Euclidean upper bound for DTW. + + @see ed.euclidean_distance. */ -seq_t lb_keogh(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, DTWSettings *settings) { - idx_t window = settings->window; - if (window == 0) { - window = MAX(l1, l2); - } - idx_t imin, imax; - idx_t t = 0; - seq_t ui; - seq_t li; - seq_t ci; - idx_t ldiff12 = l1 + 1; - if (ldiff12 > l2) { - ldiff12 -= l2; - if (ldiff12 > window) { - ldiff12 -= window; - } else { - ldiff12 = 0; - } - } else { - ldiff12 = 0; - } - idx_t ldiff21 = l2 + window; - if (ldiff21 > l1) { - ldiff21 -= l1; - } else { - ldiff21 = 0; - } - - for (idx_t i=0; i ldiff12) { - imin = i - ldiff12; - } else { - imin = 0; - } - imax = MAX(l2, ldiff21); - ui = 0; - for (idx_t j=imin; j ui) { - ui = s2[j]; - } - } - li = INFINITY; - for (idx_t j=imin; j ui) { - t += ci - ui; - } else if (ci < li) { - t += li - ci; - } - } - return t; +seq_t ub_euclidean_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2) { + return euclidean_distance_euclidean(s1, l1, s2, l2); +} + + +/*! + Euclidean upper bound for DTW. + + @see ed.euclidean_distance_ndim. +*/ +seq_t ub_euclidean_ndim_euclidean(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, int ndim) { + return euclidean_distance_ndim_euclidean(s1, l1, s2, l2, ndim); } +{% set inner_dist = 'squaredeuclidean' %} +{%- include 'lb_keogh.jinja.c' %} + +{% set inner_dist = 'euclidean' %} +{%- include 'lb_keogh.jinja.c' %} + + // MARK: Block /* Create settings struct with default values (all extras deactivated). */ diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_distance.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_distance.jinja.c index 38d166dc..bf81a31b 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_distance.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_distance.jinja.c @@ -5,6 +5,12 @@ Compute the DTW between two n-dimensional series. Compute the DTW between two series. {%- endif %} +{%- if "euclidean" == inner_dist %} +Use the Euclidean inner distance. +{%- else %}{# inner_dist == "squaredeuclidean" #} +Use the Squared Euclidean inner distance. +{%- endif %} + @param s1 First sequence @param l1 Length of first sequence. {% if "ndim" in suffix %}In tuples, real length should be length*ndim.{% endif %} @param s2 Second sequence @@ -21,9 +27,19 @@ Compute the DTW between two series. {%- set i="i_idx" %} {%- set j="j_idx" %} {%- endif %} -seq_t dtw_distance{{ suffix }}(seq_t *s1, idx_t l1, +{%- if "euclidean" == inner_dist %} +{%- set suffix2="_euclidean" %} +{%- else %} +{%- set suffix2="" %} +{%- endif %} +seq_t dtw_distance{{ suffix }}{{ suffix2 }}(seq_t *s1, idx_t l1, seq_t *s2, idx_t l2, {% if "ndim" in suffix %}int ndim,{% endif %} DTWSettings *settings) { + {%- if inner_dist != "euclidean" %} + if (settings->inner_dist == 1) { + return dtw_distance{{ suffix }}_euclidean(s1, l1, s2, l2, {% if "ndim" in suffix %}ndim, {% endif %} settings); + } + {%- endif %} assert(settings->psi_1b <= l1 && settings->psi_1e <= l1 && settings->psi_2b <= l2 && settings->psi_2e <= l2); idx_t ldiff; @@ -45,9 +61,13 @@ seq_t dtw_distance{{ suffix }}(seq_t *s1, idx_t l1, #endif if (settings->use_pruning || settings->only_ub) { {%- if "ndim" in suffix %} - max_dist = pow(ub_euclidean_ndim(s1, l1, s2, l2, ndim), 2); + max_dist = ub_euclidean_ndim{{ suffix2 }}(s1, l1, s2, l2, ndim); + {%- else %} + max_dist = ub_euclidean{{ suffix2 }}(s1, l1, s2, l2); + {%- endif %} + {%- if "euclidean" == inner_dist %} {%- else %} - max_dist = pow(ub_euclidean(s1, l1, s2, l2), 2); + max_dist = pow(max_dist, 2); {%- endif %} if (settings->only_ub) { return max_dist; @@ -174,10 +194,17 @@ seq_t dtw_distance{{ suffix }}(seq_t *s1, idx_t l1, {%- if "ndim" in suffix %} d = 0; for (int d_i=0; d_i max_step) { // Let the value be INFINITY as initialized @@ -240,7 +267,11 @@ seq_t dtw_distance{{ suffix }}(seq_t *s1, idx_t l1, if (window - 1 < 0) { l2 += window - 1; } + {%- if "euclidean" == inner_dist %} + seq_t result = dtw[length * i1 + l2 - skip]; + {%- else %} seq_t result = sqrt(dtw[length * i1 + l2 - skip]); + {%- endif %} // Deal with psi-relaxation in the last row if (settings->psi_2e != 0) { for (i=l2 - skip - settings->psi_2e; i p.ri2) { + // min_ci += rbs - p.ri2; + // max_ci += rbs - p.ri2; + // } for (ri=MAX(rbs, p.ri2); ri p.ri3) { + // min_ci += rbs - p.ri3; + // wpsi_start += rbs - p.ri3; + // } for (ri=MAX(rbs, p.ri3); riinner_dist == 1) { + return dtw_warping_paths{{ suffix }}_euclidean(wps, s1, l1, s2, l2, + return_dtw, do_sqrt, psi_neg, {% if "affinity" in suffix %}only_triu,{% endif %}{% if "ndim" in suffix %}ndim, {% endif %}{% if "affinity" in suffix -%}gamma, tau, delta, delta_factor, {% endif %}settings); + } + {%- endif %} {%- if "affinity" in suffix %} seq_t dtw_prev; {%- else %} @@ -49,10 +61,14 @@ seq_t dtw_warping_paths{{ suffix }}(seq_t *wps, {%- if "affinity" not in suffix %} if (settings->use_pruning || settings->only_ub) { if (ndim == 1) { - p.max_dist = pow(ub_euclidean(s1, l1, s2, l2), 2); + p.max_dist = ub_euclidean(s1, l1, s2, l2); } else { - p.max_dist = pow(ub_euclidean_ndim(s1, l1, s2, l2, ndim), 2); + p.max_dist = ub_euclidean_ndim(s1, l1, s2, l2, ndim); } + {%- if "euclidean" == inner_dist %} + {%- else %} + p.max_dist = pow(p.max_dist, 2); + {%- endif %} if (settings->only_ub) { if (do_sqrt) { return sqrt(p.max_dist); @@ -130,8 +146,11 @@ seq_t dtw_warping_paths{{ suffix }}(seq_t *wps, ci_idx = ci * ndim; d = 0; for (int d_i=0; d_i 0) { @@ -464,6 +495,7 @@ seq_t dtw_warping_paths{{ suffix }}(seq_t *wps, } } } + {%- endif %} return rvalue; } diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/generate.py b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/generate.py index 72c043db..bc5f7820 100755 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/generate.py +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/generate.py @@ -32,13 +32,18 @@ {}, ["dtw_distance.jinja.c", "dtw_distances.jinja.c", "dtw_warpingpaths.jinja.c", "dtw_dba.jinja.c", - "dtw_expandwps.jinja.c", "dtw_bestpath.jinja.c"]], + "dtw_expandwps.jinja.c", "dtw_bestpath.jinja.c", + "lb_keogh.jinja.c"]], "dd_dtw_openmp.c": ["dd_dtw_openmp.jinja.c", {}, ["dtw_distances_parallel.jinja.c"]], + "dd_ed.c": + ["dd_ed.jinja.c", + {}, + ["ed_distance.jinja.c"]], } -essential_targets = ['dd_dtw.c', 'dd_dtw_openmp.c'] +essential_targets = ['dd_dtw.c', 'dd_dtw_openmp.c', 'dd_ed.c'] def generate(target): @@ -62,6 +67,8 @@ def main(argv=None): parser.add_argument('--verbose', '-v', action='count', default=0, help='Verbose output') parser.add_argument('--quiet', '-q', action='count', default=0, help='Quiet output') parser.add_argument('--deps', '-d', action='store_true', help='Print dependencies') + parser.add_argument('--targets', '-t', action='store_true', help='Print targets') + parser.add_argument('--all', '-a', action='store_true', help='Use all targets') # parser.add_argument('--output', '-o', required=True, help='Output file') # parser.add_argument('--version', action='version', version='%(prog)s 1.0') parser.add_argument('input', nargs='*', help='List of target files to generate') @@ -84,6 +91,13 @@ def main(argv=None): print(' '.join(deps)) return 0 + if args.targets: + if args.all: + print(' '.join(targets.keys())) + else: + print(' '.join(inputs)) + return 0 + for target in inputs: generate(target) diff --git a/tests/test_dtw.py b/tests/test_dtw.py index defbc32c..b6f17108 100644 --- a/tests/test_dtw.py +++ b/tests/test_dtw.py @@ -66,6 +66,15 @@ def test_distance1_b(): assert d2 == pytest.approx(math.sqrt(2)) +@numpyonly +def test_distance1_b_e(): + with util_numpy.test_uses_numpy() as np: + s1 = [0, 0, 1, 2, 1, 0, 1, 0, 0] + s2 = [0, 1, 2, 0, 0, 0, 0, 0, 0] + d2, wps = dtw.warping_paths(s1, s2, inner_dist='euclidean') + assert d2 == pytest.approx(2) + + @numpyonly def test_distance1_d(): with util_numpy.test_uses_numpy() as np: @@ -75,6 +84,14 @@ def test_distance1_d(): assert(d) == pytest.approx(math.sqrt(2)) +@numpyonly +def test_distance1_d_e(): + with util_numpy.test_uses_numpy() as np: + s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0]) + s2 = np.array([0., 1, 2, 0, 0, 0, 0, 0, 0]) + d = dtw.distance_fast(s1, s2, inner_dist='euclidean') + assert(d) == pytest.approx(2) + @numpyonly def test_distance1_c(): with util_numpy.test_uses_numpy() as np: @@ -181,11 +198,12 @@ def test_distance_matrix_block(): np.set_printoptions(precision=3, linewidth=120) # test_distance1_a() # test_distance1_b() - try: - test_distance_matrix2_e() - except Exception as exc: - print(exc) - print(dtw.try_import_c()) + test_distance1_d_e() + # try: + # test_distance_matrix2_e() + # except Exception as exc: + # print(exc) + # print(dtw.try_import_c()) # run_distance_matrix_block(parallel=False, use_c=True, compact=True) # test_expected_length1() # test_condensed_index1() diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index b5c27f08..6a1aef96 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -413,7 +413,7 @@ def test_lb1(use_c): # test_dtw_subseq_eeg() # test_dtw_subseq_bug1() # test_dtw_subseq_ndim() - test_dtw_subseq_ndim2(use_c=True) + # test_dtw_subseq_ndim2(use_c=True) # test_dtw_localconcurrences_eeg() # test_dtw_subseqsearch_eeg2() # test_lc_pat1() @@ -425,4 +425,4 @@ def test_lb1(use_c): # test_dtw_subseqsearch_eeg_lb(benchmark=None, use_c=True, use_lb=False) # test_eeg_lb(benchmark=None, use_c=False) # test_dtw_localconcurrences_short() - # test_lb1(use_c=True) + test_lb1(use_c=False) diff --git a/tests/test_warping.py b/tests/test_warping.py index c54fcc95..e3fd969e 100644 --- a/tests/test_warping.py +++ b/tests/test_warping.py @@ -298,7 +298,7 @@ def test_subsequence(): directory = Path(os.environ.get('TESTDIR', Path(__file__).parent)) print(f"Saving files to {directory}") # test_normalize() - # test_normalize2() + test_normalize2() # test_normalize2_prob() # test_warping_path1() # test_warping_path2() @@ -308,5 +308,5 @@ def test_subsequence(): # test_psi_dtw_1d() # test_psi_dtw_2a() # test_psi_dtw_2b() - test_twoleadecg_1() + # test_twoleadecg_1() # test_subsequence() From 1ece7ac85d737c2617a5154d6cb0c57b35ded434 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 12 Jun 2023 13:52:56 +0200 Subject: [PATCH 45/59] bump version --- dtaidistance/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dtaidistance/__init__.py b/dtaidistance/__init__.py index 1d8b8761..4e1ed832 100644 --- a/dtaidistance/__init__.py +++ b/dtaidistance/__init__.py @@ -32,7 +32,7 @@ # "then run `cd {};python3 setup.py build_ext --inplace`.".format(dtaidistance_dir)) dtw_cc = None -__version__ = "2.3.10" +__version__ = "2.3.11" __author__ = "Wannes Meert" __copyright__ = "Copyright 2017-2022 KU Leuven, DTAI Research Group" __license__ = "Apache License, Version 2.0" From 922027a3e31399efff6c51f3048f5adef3404ae5 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 14 Jun 2023 10:56:52 +0200 Subject: [PATCH 46/59] docs --- dtaidistance/dtw.py | 7 +++++- dtaidistance/dtw_visualisation.py | 15 +++++++------ dtaidistance/innerdistance.py | 10 +++++++++ tests/test_bugs.py | 36 ++++++++++++++++++++++++++++++- 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index fb6e61d5..50aed5d4 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -223,7 +223,12 @@ def distance(s1, s2, This is the same as passing ub_euclidean() to max_dist :param only_ub: Only compute the upper bound (Euclidean). :param inner_dist: Distance between two points in the time series. - One of 'squared euclidean' (default), 'euclidean' + One of 'squared euclidean' (default), 'euclidean'. + When using the pure Python implementation (thus use_c=False) then the argument can also + be an object that has as callable arguments 'inner_dist' and 'result'. The 'inner_dist' + function computes the distance between two points (e.g., squared euclidean) and 'result' + is the function to apply to the final distance (e.g., sqrt when using squared euclidean). + You can also inherit from the 'innerdistance.CustomInnerDist' class. Returns: DTW distance """ diff --git a/dtaidistance/dtw_visualisation.py b/dtaidistance/dtw_visualisation.py index c582f0e3..5521da4b 100644 --- a/dtaidistance/dtw_visualisation.py +++ b/dtaidistance/dtw_visualisation.py @@ -357,18 +357,19 @@ def plot_matrix(distances, shownumbers=False, filename=None, fig=None, ax=None): ax.yaxis.set_ticks_position('both') im = ax.imshow(distances) - idxs = [str(i) for i in range(len(distances))] + idxs_y = [str(i) for i in range(distances.shape[0])] + idxs_x = [str(i) for i in range(distances.shape[1])] # Show all ticks - ax.set_xticks(np.arange(len(idxs))) - ax.set_xticklabels(idxs) - ax.set_yticks(np.arange(len(idxs))) - ax.set_yticklabels(idxs) + ax.set_xticks(np.arange(len(idxs_x))) + ax.set_xticklabels(idxs_x) + ax.set_yticks(np.arange(len(idxs_y))) + ax.set_yticklabels(idxs_y) ax.set_title("Distances between series", pad=30) if shownumbers: - for i in range(len(idxs)): - for j in range(len(idxs)): + for i in range(len(idxs_y)): + for j in range(len(idxs_x)): if not np.isinf(distances[i, j]): l = "{:.2f}".format(distances[i, j]) ax.text(j, i, l, ha="center", va="center", color="w") diff --git a/dtaidistance/innerdistance.py b/dtaidistance/innerdistance.py index c728fea8..d666b76a 100644 --- a/dtaidistance/innerdistance.py +++ b/dtaidistance/innerdistance.py @@ -85,10 +85,20 @@ class CustomInnerDist: @staticmethod def inner_dist(x, y): + """The distance between two points in the series. + + For example, for default DTW this would be the Squared Euclidean + distance: (a-b)**2. + """ raise Exception("Function not defined") @staticmethod def result(x): + """The transformation applied to the sum of all inner distances. + + For example, for default DTW, which uses Squared Euclidean, this + would be: sqrt(d). Because d = (a_0-b_0)**2 + (a_1-b_1)**2 ... + """ raise Exception("Function not defined") diff --git a/tests/test_bugs.py b/tests/test_bugs.py index 8c833aab..6621841c 100644 --- a/tests/test_bugs.py +++ b/tests/test_bugs.py @@ -315,6 +315,40 @@ def test_bug_size(): assert d1 == pytest.approx(d2) +@numpyonly +def test_bug5_path(): + """ + without psi: [(0, 0), (0, 1), (1, 2), (1, 3), (2, 4)] + with psi: [(0, 1), (1, 2), (1, 3), (2, 4)] + + Why is this not (with psi): [(2,4), (1,3), (0,2)] ? + Answer: + Numerical inaccuracies. When choosing the best path from (1,3) the + three options are [1.0, 1.9999999999999996, 0.9999999999999996]. + Thus moving left (last one) is chosen instead of the expected diagonal. + + In theory: + Path 1: (0,2), (1,3), (2,4) = sqrt(1**2 + 0 + 0) = 1 + Path 2: (0,1), (1,2), (1,3), (2,4) = sqrt(0 + 1**2 + 0 + 0) = 1 + And path 1 should be chosen because the diagonal move has priority. + + In practice, floating point inaccuracies: + Path 1: (2.1-3.1) = 1.0 + Path 2: (4.1-3.1) = 0.9999999999999996 + + """ + with util_numpy.test_uses_numpy() as np: + s1 = np.array([2.1, 4.1, 5.1]) + s2 = np.array([1.1, 2.1, 3.1, 4.1, 5.1]) + d1, wps = dtw.warping_paths(s1, s2, psi=[0, 0, len(s2), len(s2)]) + best_path = dtw.best_path(wps) + print(best_path) + + # if directory and not dtwvis.test_without_visualization(): + # dtwvis.plot_warpingpaths(s1, s2, wps, best_path, filename=directory / 'bug5_warpingpaths.png') + # dtwvis.plot_matrix(wps, shownumbers=True, filename=directory / 'bug5_matrix.png') + + if __name__ == "__main__": directory = Path(os.environ.get('TESTDIR', Path(__file__).parent)) # with util_numpy.test_uses_numpy() as np: @@ -334,5 +368,5 @@ def test_bug_size(): # test_bug1_psi() # test_bug2() # test_bug3() - test_bug4() + test_bug5_path() # test_bug_size() From 15cac70b9a0e244cb39d7591212f8e5b3e3ea883 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 14 Jun 2023 18:50:35 +0200 Subject: [PATCH 47/59] add best_path_isclose to c source --- .../DTAIDistanceC/dd_benchmark.c | 24 +++- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c | 130 +++++++++++++++++- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h | 10 +- .../DTAIDistanceC/jinja/dd_dtw.jinja.c | 9 +- .../DTAIDistanceC/jinja/dtw_bestpath.jinja.c | 44 ++++-- 5 files changed, 195 insertions(+), 22 deletions(-) diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c index d65bdd23..e2e9aa56 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c @@ -109,17 +109,31 @@ void benchmark3() { void benchmark4() { // double s1[] = {0, 0, 1, 2, 1, 0, 1, 0, 0}; int l1 = 9; // double s2[] = {0, 1, 2, 0, 0, 0, 0, 0, 0}; int l2 = 9; - double s1[] = {0., 0., 1., 2., 1., 0., 1., 0., 0., 2., 1., 0., 0.}; int l1 = 13; - double s2[] = {0., 1., 2., 3., 1., 0., 0., 0., 2., 1., 0., 0., 0.}; int l2 = 13; +// double s1[] = {0., 0., 1., 2., 1., 0., 1., 0., 0., 2., 1., 0., 0.}; int l1 = 13; +// double s2[] = {0., 1., 2., 3., 1., 0., 0., 0., 2., 1., 0., 0., 0.}; int l2 = 13; + double s1[] = {2.1, 4.1, 5.1}; int l1 = 3; + double s2[] = {1.1, 2.1, 3.1, 4.1, 5.1}; int l2 = 5; DTWSettings settings = dtw_settings_default(); settings.use_pruning = true; settings.inner_dist = 0; - dtw_settings_set_psi(2, &settings); + settings.psi_2b = l2; + settings.psi_2e = l2; +// dtw_settings_set_psi(2, &settings); // double d = dtw_distance(s1, 9, s2, 9, &settings); idx_t wps_length = dtw_settings_wps_length(l1, l2, &settings); seq_t wps[wps_length]; double d = dtw_warping_paths(wps, s1, l1, s2, l2, true, true, false, &settings); printf("d=%f\n", d); + + idx_t i1[l1+l2]; + idx_t i2[l1+l2]; + for (idx_t i=0; i<(l1+l2); i++) {i1[i]=0; i2[i]=0;} + dtw_best_path_isclose(wps, i1, i2, l1, l2, 1e-05, 1e-08, &settings); + printf("["); + for (idx_t i=0; i<(l1+l2); i++) { + printf("(%zu,%zu)", i1[i], i2[i]); + } + printf("]\n"); } void benchmark5() { @@ -593,10 +607,10 @@ int main(int argc, const char * argv[]) { time(&start_t); clock_gettime(CLOCK_REALTIME, &start); - benchmark1(); +// benchmark1(); // benchmark2(); // benchmark3(); -// benchmark4(); + benchmark4(); // benchmark5(); // benchmark6(); // benchmark7(); diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c index 6f236b83..ece65e10 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c @@ -27,7 +27,8 @@ DTWSettings dtw_settings_default(void) { .psi_2e = 0, .use_pruning = false, .only_ub = false, - .inner_dist = 0 + .inner_dist = 0, + .window_type = 0 }; return s; } @@ -61,6 +62,7 @@ void dtw_settings_print(DTWSettings *settings) { printf(" use_pruning = %d\n", settings->use_pruning); printf(" only_ub = %d\n", settings->only_ub); printf(" inner_dist = %d\n", settings->inner_dist); + printf(" window_type = %d\n", settings->window_type); printf("}\n"); } @@ -3206,6 +3208,7 @@ idx_t dtw_wps_max(DTWWps* p, seq_t *wps, idx_t *r, idx_t *c, idx_t l1, idx_t l2) } + /*! Compute best path between two series. @@ -3219,6 +3222,7 @@ Compute best path between two series. @param settings for Dynamic Time Warping. @return length of path */ + idx_t dtw_best_path(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, DTWSettings *settings) { @@ -3326,6 +3330,129 @@ idx_t dtw_best_path(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, } +/*! +Compute best path between two series. + + @param wps Array of length `(l1+1)*min(l2+1, abs(l1-l2) + 2*window-1)` with the warping paths. + @param i1 Array of length l1+l2 to store the indices for the first sequence. + Reverse ordered, last one is if i1 or i2 is zero. + @param i2 Array of length l1+l2 to store the indices for the second sequence. + Reverse ordered, last one is if i1 or i2 is zero. + @param l1 Length of first array. + @param l2 Length of second array. + @param rtol Relative tolerance for isclose, typical value is 1e-05 + @param atol Absolute tolerance for isclose, typical value is 1e-08 + @param settings for Dynamic Time Warping. + @return length of path + */ + +idx_t dtw_best_path_isclose(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, + seq_t rtol, seq_t atol, + DTWSettings *settings) { + DTWWps p = dtw_wps_parts(l1, l2, settings); + + idx_t i = 0; + idx_t rip = l1; + idx_t cip = l2; + idx_t min_ci; + idx_t wpsi_start, wpsi; + idx_t ri_widthp = p.width * (rip - 1); + idx_t ri_width = p.width * rip; + + // D. ri3 <= ri < l1 + min_ci = p.ri3 + 1 - p.window - p.ldiff; + wpsi_start = 2; + if (p.ri2 == p.ri3) { + wpsi_start = min_ci + 1; + } else { + min_ci = 1 + p.ri3 - p.ri2; + } + wpsi = wpsi_start + (l2 - min_ci) - 1; + while (rip > p.ri3 && cip > 0) { + if (wps[ri_width + wpsi] != -1) { + i1[i] = rip - 1; + i2[i] = cip - 1; + i++; + } + if ((wps[ri_widthp + wpsi - 1] <= wps[ri_width + wpsi - 1] || fabs(wps[ri_widthp + wpsi - 1] - wps[ri_width + wpsi - 1]) <= (atol + rtol * fabs(wps[ri_width + wpsi - 1]))) && + (wps[ri_widthp + wpsi - 1] <= wps[ri_widthp + wpsi] || fabs(wps[ri_widthp + wpsi - 1] - wps[ri_widthp + wpsi]) <= (atol + rtol * fabs(wps[ri_widthp + wpsi])))) { + // Go diagonal + cip--; + rip--; + wpsi = wpsi - 1; + ri_width = ri_widthp; + ri_widthp -= p.width; + } else if ((wps[ri_width + wpsi - 1] <= wps[ri_widthp + wpsi] || fabs(wps[ri_width + wpsi - 1] - wps[ri_widthp + wpsi]) <= (atol + rtol * fabs(wps[ri_widthp + wpsi])))) { + // Go left + cip--; + wpsi--; + } else { + // Go up + rip--; + ri_width = ri_widthp; + ri_widthp -= p.width; + } + } + + // C. ri2 <= ri < ri3 + while (rip > p.ri2 && cip > 0) { + if (wps[ri_width + wpsi] != -1) { + i1[i] = rip - 1; + i2[i] = cip - 1; + i++; + } + if ((wps[ri_widthp + wpsi] <= wps[ri_width + wpsi - 1] || fabs(wps[ri_widthp + wpsi] - wps[ri_width + wpsi - 1]) <= (atol + rtol * fabs(wps[ri_width + wpsi - 1]))) && + (wps[ri_widthp + wpsi] <= wps[ri_widthp + wpsi + 1] || fabs(wps[ri_widthp + wpsi] - wps[ri_widthp + wpsi + 1]) <= (atol + rtol * fabs(wps[ri_widthp + wpsi + 1])))) { + // Go diagonal + cip--; + rip--; + ri_width = ri_widthp; + ri_widthp -= p.width; + } else if ((wps[ri_width + wpsi - 1] <= wps[ri_widthp + wpsi + 1] || fabs(wps[ri_width + wpsi - 1] - wps[ri_widthp + wpsi + 1]) <= (atol + rtol * fabs(wps[ri_widthp + wpsi + 1])))) { + // Go left + cip--; + wpsi--; + } else { + // Go up + rip--; + wpsi++; + ri_width = ri_widthp; + ri_widthp -= p.width; + } + } + + // A-B. 0 <= ri < ri2 + while (rip > 0 && cip > 0) { + if (wps[ri_width + wpsi] != -1) { + i1[i] = rip - 1; + i2[i] = cip - 1; + i++; + } + if ((wps[ri_widthp + wpsi - 1] <= wps[ri_width + wpsi - 1] || fabs(wps[ri_widthp + wpsi - 1] - wps[ri_width + wpsi - 1]) <= (atol + rtol * fabs(wps[ri_width + wpsi - 1]))) && + (wps[ri_widthp + wpsi - 1] <= wps[ri_widthp + wpsi] || fabs(wps[ri_widthp + wpsi - 1] - wps[ri_widthp + wpsi]) <= (atol + rtol * fabs(wps[ri_widthp + wpsi])))) { + // Go diagonal + cip--; + rip--; + wpsi--; + ri_width = ri_widthp; + ri_widthp -= p.width; + } else { + if ((wps[ri_width + wpsi - 1] <= wps[ri_widthp + wpsi] || fabs(wps[ri_width + wpsi - 1] - wps[ri_widthp + wpsi]) <= (atol + rtol * fabs(wps[ri_widthp + wpsi])))) { + // Go left + cip--; + wpsi--; + } else { + // Go up + rip--; + ri_width = ri_widthp; + ri_widthp -= p.width; + } + } + } + return i; +} + + /*! Compute best path in affinity matrix for two series. @@ -3341,6 +3468,7 @@ Compute best path in affinity matrix for two series. @param settings for Dynamic Time Warping. @return length of path */ + idx_t dtw_best_path_affinity(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, idx_t rs, idx_t cs, DTWSettings *settings) { diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h index f7c77f3d..6c467402 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.h @@ -41,8 +41,12 @@ static char printFormat[5]; // Inner distance options -//const int kSquaredEuclidean = 0; -//const int kEuclidean = 1; +//const int kSquaredEuclideanInnerDist = 0; +//const int kEuclideanInnerDist = 1; + +// Band type +//const int kSakoeChibaBand = 0; +//const int kSlantedBand = 1; /** @@ -76,6 +80,7 @@ struct DTWSettings_s { bool use_pruning; bool only_ub; int inner_dist; // 0=squared euclidean, 1=euclidean + int window_type; // 0=band around two diagonals, 1=band around slanted diagonal }; typedef struct DTWSettings_s DTWSettings; @@ -153,6 +158,7 @@ idx_t dtw_wps_loc(DTWWps* p, idx_t r, idx_t c, idx_t l1, idx_t l2); idx_t dtw_wps_loc_columns(DTWWps* p, idx_t r, idx_t *cb, idx_t *ce, idx_t l1, idx_t l2); idx_t dtw_wps_max(DTWWps* p, seq_t *wps, idx_t *r, idx_t *c, idx_t l1, idx_t l2); idx_t dtw_best_path(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, DTWSettings *settings); +idx_t dtw_best_path_isclose(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, seq_t rtol, seq_t atol, DTWSettings *settings); idx_t dtw_best_path_affinity(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, idx_t s1, idx_t s2, DTWSettings *settings); idx_t dtw_best_path_prob(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, seq_t avg, DTWSettings *settings); seq_t dtw_warping_path(seq_t *from_s, idx_t from_l, seq_t* to_s, idx_t to_l, idx_t *from_i, idx_t *to_i, idx_t * length_i, DTWSettings * settings); diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c index 5058fe3a..c5f9ea25 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c @@ -27,7 +27,8 @@ DTWSettings dtw_settings_default(void) { .psi_2e = 0, .use_pruning = false, .only_ub = false, - .inner_dist = 0 + .inner_dist = 0, + .window_type = 0 }; return s; } @@ -61,6 +62,7 @@ void dtw_settings_print(DTWSettings *settings) { printf(" use_pruning = %d\n", settings->use_pruning); printf(" only_ub = %d\n", settings->only_ub); printf(" inner_dist = %d\n", settings->inner_dist); + printf(" window_type = %d\n", settings->window_type); printf("}\n"); } @@ -605,10 +607,15 @@ idx_t dtw_wps_max(DTWWps* p, seq_t *wps, idx_t *r, idx_t *c, idx_t l1, idx_t l2) {% set suffix = '' %} +{% set use_isclose = 0 %} {%- include 'dtw_bestpath.jinja.c' %} +{% set suffix = '' %} +{% set use_isclose = 1 %} +{%- include 'dtw_bestpath.jinja.c' %} {% set suffix = '_affinity' %} +{% set use_isclose = 0 %} {%- include 'dtw_bestpath.jinja.c' %} diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_bestpath.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_bestpath.jinja.c index 1390eda3..b230275c 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_bestpath.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_bestpath.jinja.c @@ -16,17 +16,35 @@ Compute best path between two series. @param rs Start position row. @param cs Start position column. {%- endif %} + {%- if use_isclose == 1 %} + @param rtol Relative tolerance for isclose, typical value is 1e-05 + @param atol Absolute tolerance for isclose, typical value is 1e-08 + {%- endif %} @param settings for Dynamic Time Warping. @return length of path */ -{%- if "affinity" in suffix %} -{%- set cmp=">=" %} +{% macro cmpfn(a, b) -%} +{%- if use_isclose == 1 -%} +({{a}} <= {{b}} || fabs({{a}} - {{b}}) <= (atol + rtol * fabs({{b}}))) +{%- else %} +{%- if "affinity" in suffix -%} +{{a}} >= {{b}} +{%- else -%} +{{a}} <= {{b}} +{%- endif %} +{%- endif %} +{%- endmacro -%} +{%- if use_isclose == 1 %} +{%- set suffix2 = "_isclose" %} {%- else %} -{%- set cmp="<=" %} +{%- set suffix2 = "" %} {%- endif %} -idx_t dtw_best_path{{suffix}}(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, +idx_t dtw_best_path{{suffix}}{{suffix2}}(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t l2, {% if "affinity" in suffix -%} idx_t rs, idx_t cs, + {%- endif -%} + {%- if use_isclose == 1 -%} + seq_t rtol, seq_t atol, {%- endif %} DTWSettings *settings) { DTWWps p = dtw_wps_parts(l1, l2, settings); @@ -75,15 +93,15 @@ idx_t dtw_best_path{{suffix}}(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t i++; } {%- endif %} - if (wps[ri_widthp + wpsi - 1] {{cmp}} wps[ri_width + wpsi - 1] && - wps[ri_widthp + wpsi - 1] {{cmp}} wps[ri_widthp + wpsi]) { + if ({{cmpfn("wps[ri_widthp + wpsi - 1]", "wps[ri_width + wpsi - 1]")}} && + {{cmpfn("wps[ri_widthp + wpsi - 1]", "wps[ri_widthp + wpsi]")}}) { // Go diagonal cip--; rip--; wpsi = wpsi - 1; ri_width = ri_widthp; ri_widthp -= p.width; - } else if (wps[ri_width + wpsi - 1] {{cmp}} wps[ri_widthp + wpsi]) { + } else if ({{cmpfn("wps[ri_width + wpsi - 1]","wps[ri_widthp + wpsi]")}}) { // Go left cip--; wpsi--; @@ -113,14 +131,14 @@ idx_t dtw_best_path{{suffix}}(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t i++; } {%- endif %} - if (wps[ri_widthp + wpsi] {{cmp}} wps[ri_width + wpsi - 1] && - wps[ri_widthp + wpsi] {{cmp}} wps[ri_widthp + wpsi + 1]) { + if ({{cmpfn("wps[ri_widthp + wpsi]","wps[ri_width + wpsi - 1]")}} && + {{cmpfn("wps[ri_widthp + wpsi]","wps[ri_widthp + wpsi + 1]")}}) { // Go diagonal cip--; rip--; ri_width = ri_widthp; ri_widthp -= p.width; - } else if (wps[ri_width + wpsi - 1] {{cmp}} wps[ri_widthp + wpsi + 1]) { + } else if ({{cmpfn("wps[ri_width + wpsi - 1]","wps[ri_widthp + wpsi + 1]")}}) { // Go left cip--; wpsi--; @@ -151,8 +169,8 @@ idx_t dtw_best_path{{suffix}}(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t i++; } {%- endif %} - if (wps[ri_widthp + wpsi - 1] {{cmp}} wps[ri_width + wpsi - 1] && - wps[ri_widthp + wpsi - 1] {{cmp}} wps[ri_widthp + wpsi]) { + if ({{cmpfn("wps[ri_widthp + wpsi - 1]","wps[ri_width + wpsi - 1]")}} && + {{cmpfn("wps[ri_widthp + wpsi - 1]","wps[ri_widthp + wpsi]")}}) { // Go diagonal cip--; rip--; @@ -160,7 +178,7 @@ idx_t dtw_best_path{{suffix}}(seq_t *wps, idx_t *i1, idx_t *i2, idx_t l1, idx_t ri_width = ri_widthp; ri_widthp -= p.width; } else { - if (wps[ri_width + wpsi - 1] {{cmp}} wps[ri_widthp + wpsi]) { + if ({{cmpfn("wps[ri_width + wpsi - 1]","wps[ri_widthp + wpsi]")}}) { // Go left cip--; wpsi--; From 0343fe0fee08b4079df491ff5c8dc46d72f43465 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 5 Jul 2023 15:19:20 +0200 Subject: [PATCH 48/59] k-means clustering, monitor distances --- dtaidistance/clustering/kmeans.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/dtaidistance/clustering/kmeans.py b/dtaidistance/clustering/kmeans.py index 286a9a82..f06d62c6 100644 --- a/dtaidistance/clustering/kmeans.py +++ b/dtaidistance/clustering/kmeans.py @@ -238,12 +238,19 @@ def kmeansplusplus_centers(self, series, use_c=False): def fit_fast(self, series): return self.fit(series, use_c=True, use_parallel=True) - def fit(self, series, use_c=False, use_parallel=True): + def fit(self, series, use_c=False, use_parallel=True, monitor_distances=None): """Perform K-means clustering. :param series: Container with series :param use_c: Use the C-library (only available if package is compiled) :param use_parallel: Use multipool for parallelization + :param monitor_distances: This function is called with two arguments: + (1) a list of (cluster, distance) for each instance; + (2) a boolean indicating whether the clustering has been stopped or not. + From this one can compute inertia or other metrics + to monitor the clustering. If the boolean argument is true, this is the + final assignment. If this function returns True, the clustering + continues, if False is returned the clustering is stopped. :return: cluster indices, number of iterations If the number of iterations is equal to max_it, the clustering did not converge. @@ -299,6 +306,10 @@ def fit(self, series, use_c=False, use_parallel=True): else: clusters_distances = list(map(fn, [(self.series[idx], self.means, self.dists_options) for idx in range(len(self.series))])) + if monitor_distances is not None: + cont = monitor_distances(clusters_distances, False) + if cont is False: + break clusters, distances = zip(*clusters_distances) distances = list(distances) @@ -393,6 +404,8 @@ def fit(self, series, use_c=False, use_parallel=True): else: clusters_distances = list(map(fn, [(self.series[idx], self.means, self.dists_options) for idx in range(len(self.series))])) + if monitor_distances is not None: + monitor_distances(clusters_distances, True) clusters, distances = zip(*clusters_distances) # self.cluster_idx = {medoid: {inst for inst in instances} From 0d1b41782aaabc4c05a1722dd8379f35939a6d55 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 5 Jul 2023 15:21:12 +0200 Subject: [PATCH 49/59] k-means clustering, monitor distances --- dtaidistance/clustering/kmeans.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dtaidistance/clustering/kmeans.py b/dtaidistance/clustering/kmeans.py index f06d62c6..6af1c252 100644 --- a/dtaidistance/clustering/kmeans.py +++ b/dtaidistance/clustering/kmeans.py @@ -235,8 +235,8 @@ def kmeansplusplus_centers(self, series, use_c=False): logger.debug('... Done') return means - def fit_fast(self, series): - return self.fit(series, use_c=True, use_parallel=True) + def fit_fast(self, series, monitor_distances=None): + return self.fit(series, use_c=True, use_parallel=True, monitor_distances=monitor_distances) def fit(self, series, use_c=False, use_parallel=True, monitor_distances=None): """Perform K-means clustering. From ebf6679319e50b667c228f163b611436a3c8a46b Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 27 Jul 2023 15:38:20 +0200 Subject: [PATCH 50/59] Update deploy.yml --- .github/workflows/deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index a9e35043..c4ba0b3f 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,8 +10,8 @@ name: Python package on: push: branches: [ deploy ] - pull_request: - banches: [ deploy ] + # pull_request: + # branches: [ deploy ] jobs: # Explore-GitHub-Actions: From a4b4bbf4be5bac7b95210b993ee24213bc2a0a36 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 25 Jul 2023 16:34:07 +0200 Subject: [PATCH 51/59] Fix build for Cython v3.0.0 Fixes #195 --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index de52e266..a5458591 100755 --- a/setup.py +++ b/setup.py @@ -403,7 +403,7 @@ def check_openmp(cc_bin, noxpreprocessor, printfn=print): extensions.append( Extension( "dtaidistance.dtw_cc", - ["dtaidistance/dtw_cc.pyx", "dtaidistance/dtw_cc.pxd", + ["dtaidistance/dtw_cc.pyx", "dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c", "dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_ed.c" ], @@ -447,8 +447,8 @@ def check_openmp(cc_bin, noxpreprocessor, printfn=print): else: print("WARNING: Numpy was not found, preparing a version without Numpy support.") - ext_modules = cythonize(extensions) - # compiler_directives={'language_level': "3"}) + ext_modules = cythonize(extensions, language_level=2) + else: print("WARNING: Cython was not found, preparing a pure Python version.") ext_modules = [] From 5d9aaafecbade603e2150bc7fd18a7f57591daa2 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 25 Jul 2023 16:52:05 +0200 Subject: [PATCH 52/59] Fix broken test case --- tests/test_bugsvis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_bugsvis.py b/tests/test_bugsvis.py index 09f08396..485106b0 100644 --- a/tests/test_bugsvis.py +++ b/tests/test_bugsvis.py @@ -73,7 +73,7 @@ def test_bug3(): np.array([1., 2, 0, 0, 0, 0, 0, 1, 1, 3, 4, 5]), np.array([0., 0, 1, 2, 1, 0, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0]), - np.array([1., 2, 0, 0, 0, 0, 0, 1, 1])]) + np.array([1., 2, 0, 0, 0, 0, 0, 1, 1])], dtype=object) ds = dtw.distance_matrix(series) print(ds) From d914ab85021f67ff1c58d45727e0e4844ad26d8e Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 1 Aug 2023 14:51:50 +0200 Subject: [PATCH 53/59] Update Ubuntu Actions runner image --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c4ba0b3f..9ce571df 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -83,7 +83,7 @@ jobs: matrix: # os: [ubuntu-20.04,ubuntu-18.04] # python-version: ["3.8","3.9","3.10"] - os: [ubuntu-18.04] + os: [ubuntu-22.04] # cibuildwheel will automatically provide all Python versions python-version: ["3.9"] steps: From 655a6c133882fba8bdc491483e0c3df294cdc778 Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 3 Aug 2023 23:35:17 +0200 Subject: [PATCH 54/59] c --- dtaidistance/lib/DTAIDistanceC/.gitignore | 1 + .../DTAIDistanceC/DTAIDistanceC/dd_benchmark.c | 2 +- .../lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c | 18 ++++++++++++------ .../DTAIDistanceC/jinja/dd_dtw.jinja.c | 10 ++++++++-- .../DTAIDistanceC/jinja/dtw_expandwps.jinja.c | 4 ++-- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/dtaidistance/lib/DTAIDistanceC/.gitignore b/dtaidistance/lib/DTAIDistanceC/.gitignore index 2732312c..66f16304 100644 --- a/dtaidistance/lib/DTAIDistanceC/.gitignore +++ b/dtaidistance/lib/DTAIDistanceC/.gitignore @@ -1,4 +1,5 @@ profile* +DTAIDistanceC/jinja/dd_ed.c DTAIDistanceC/jinja/dd_dtw.c DTAIDistanceC/jinja/dd_dtw_openmp.c DTAIDistanceC/jinja/dtw_distances_matrix.c diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c index e2e9aa56..6f2f80d9 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_benchmark.c @@ -128,7 +128,7 @@ void benchmark4() { idx_t i1[l1+l2]; idx_t i2[l1+l2]; for (idx_t i=0; i<(l1+l2); i++) {i1[i]=0; i2[i]=0;} - dtw_best_path_isclose(wps, i1, i2, l1, l2, 1e-05, 1e-08, &settings); + dtw_best_path_isclose(wps, i1, i2, l1, l2, /*rtol=*/1e-05, /*atol=*/1e-08, &settings); printf("["); for (idx_t i=0; i<(l1+l2); i++) { printf("(%zu,%zu)", i1[i], i2[i]); diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c index ece65e10..eafbc422 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/dd_dtw.c @@ -1843,7 +1843,7 @@ void dtw_expand_wps_slice(seq_t *wps, seq_t *full, max_ci = p.window + p.ldiffc; // ri < overlap_right_i max_ci += rbs; for (ri=rbs; ri= overlap_right_i if (rbs < p.ri2) { for (ri=MAX(rbs, p.ri1); ri= overlap_right_i if (rbs < p.ri2) { for (ri=MAX(rbs, p.ri1); ri [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ - idx = wpsi + (cb - cbs); + idx = wpsi; + if (cb > cbs) { + idx += cb - cbs; + } for (j=cbp; j 0 && wps[idx] != INFINITY) { wps[idx] = -wps[idx]; @@ -2835,7 +2838,10 @@ void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx break; } /* printf("--> [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ - idx = wpsi + (cb - cbs); + idx = wpsi; + if (cb > cbs) { + idx += cb - cbs; + } for (j=cbp; j 0 && wps[idx] != INFINITY) { wps[idx] = -wps[idx]; diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c index c5f9ea25..d9d4a9e8 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dd_dtw.jinja.c @@ -214,7 +214,10 @@ void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx cbp = MAX(cb, cbs); cep = MIN(ce, ces); /* printf("--> [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ - idx = wpsi + (cb - cbs); + idx = wpsi; + if (cb > cbs) { + idx += cb - cbs; + } for (j=cbp; j 0 && wps[idx] != INFINITY) { wps[idx] = -wps[idx]; @@ -233,7 +236,10 @@ void dtw_wps_negativize(DTWWps* p, seq_t *wps, idx_t l1, idx_t l2, idx_t rb, idx break; } /* printf("--> [%zu,%zu] -- %zu + %zu\n", cbp, cep, wpsi, cb-cbs); */ - idx = wpsi + (cb - cbs); + idx = wpsi; + if (cb > cbs) { + idx += cb - cbs; + } for (j=cbp; j 0 && wps[idx] != INFINITY) { wps[idx] = -wps[idx]; diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c index 8803c8c9..df0a3e97 100644 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c +++ b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC/jinja/dtw_expandwps.jinja.c @@ -66,7 +66,7 @@ void dtw_expand_wps_slice{{suffix}}(seq_t *wps, seq_t *full, max_ci = p.window + p.ldiffc; // ri < overlap_right_i max_ci += rbs; for (ri=rbs; ri= overlap_right_i if (rbs < p.ri2) { for (ri=MAX(rbs, p.ri1); ri Date: Sun, 6 Aug 2023 13:56:10 +0200 Subject: [PATCH 55/59] restructured subsequence search algos --- Makefile | 8 +- dtaidistance/dtw.py | 23 +- dtaidistance/dtw_cc.pyx | 34 +- dtaidistance/dtw_visualisation.py | 24 +- dtaidistance/jinja/dtw_cc.jinja.pyx | 34 +- dtaidistance/subsequence/dtw.py | 710 +---------------- dtaidistance/subsequence/localconcurrences.py | 732 ++++++++++++++++++ .../subsequence/subsequencealignment.py | 269 +++++++ dtaidistance/subsequence/subsequencesearch.py | 296 +++++++ tests/rsrc/pat1.txt | 2 + tests/test_subsequence.py | 289 ++++++- 11 files changed, 1642 insertions(+), 779 deletions(-) create mode 100644 dtaidistance/subsequence/localconcurrences.py create mode 100644 dtaidistance/subsequence/subsequencealignment.py create mode 100644 dtaidistance/subsequence/subsequencesearch.py create mode 100644 tests/rsrc/pat1.txt diff --git a/Makefile b/Makefile index 1930b4a5..b62b177f 100644 --- a/Makefile +++ b/Makefile @@ -84,8 +84,14 @@ clean: rm -f dtaidistance/*.pyc rm -rf dtaidistance/__pycache__ +.PHONY: use-venv +use-venv: + $(eval $@_TMP := $(shell python3 -c 'import sys; print(sys.prefix)')) + @#@echo $($@_TMP) + @if [ -f "use_venv.txt" ]; then grep '$($@_TMP)' use_venv.txt || (echo "venv does not appear in use_venv.txt: $($@_TMP)"; exit 1) ;fi + .PHONY: build -build: +build: use-venv python3 setup.py build_ext --inplace .PHONY: pypy-build diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 50aed5d4..eb56516d 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -566,7 +566,7 @@ def warping_paths_fast(s1, s2, window=None, max_dist=None, use_pruning=False, def warping_paths_affinity(s1, s2, window=None, only_triu=False, penalty=None, psi=None, psi_neg=True, - gamma=1, tau=0, delta=0, delta_factor=1, exp_avg=None, + gamma=1, tau=0, delta=0, delta_factor=1, use_c=False): """ Dynamic Time Warping warping paths using an affinity/similarity matrix instead of a distance matrix. @@ -585,8 +585,7 @@ def warping_paths_affinity(s1, s2, window=None, only_triu=False, """ if use_c: return warping_paths_affinity_fast(s1, s2, window=window, only_triu=only_triu, - penalty=penalty, tau=tau, delta=delta, delta_factor=delta_factor, - exp_avg=exp_avg) + penalty=penalty, tau=tau, delta=delta, delta_factor=delta_factor) if np is None: raise NumpyException("Numpy is required for the warping_paths method") r, c = len(s1), len(s2) @@ -618,20 +617,10 @@ def warping_paths_affinity(s1, s2, window=None, only_triu=False, dtw_prev = max(dtw[i0, j], dtw[i0, j + 1] - penalty, dtw[i1, j] - penalty) - if exp_avg is None: - if d < tau: - # if dtw_prev > 10 * -delta: - # dtw_prev = 10 * -delta - dtw[i1, j + 1] = max(0, delta + delta_factor * dtw_prev) - else: - dtw[i1, j + 1] = max(0, d + dtw_prev) + if d < tau: + dtw[i1, j + 1] = max(0, delta + delta_factor * dtw_prev) else: - if d < tau: - d = delta - if j == 0 or i0 == 0: - dtw[i1, j + 1] = max(0, d) - else: - dtw[i1, j + 1] = max(0, exp_avg * d + (1-exp_avg) * dtw_prev) + dtw[i1, j + 1] = max(0, d + dtw_prev) # Decide which d to return if psi_1e == 0 and psi_2e == 0: @@ -667,7 +656,7 @@ def warping_paths_affinity(s1, s2, window=None, only_triu=False, def warping_paths_affinity_fast(s1, s2, window=None, only_triu=False, penalty=None, psi=None, psi_neg=True, gamma=1, tau=0, delta=0, delta_factor=1, - exp_avg=None, compact=False, use_ndim=False): + compact=False, use_ndim=False): """Fast C version of :meth:`warping_paths`. Additional parameters: diff --git a/dtaidistance/dtw_cc.pyx b/dtaidistance/dtw_cc.pyx index 73573bfc..826312cd 100644 --- a/dtaidistance/dtw_cc.pyx +++ b/dtaidistance/dtw_cc.pyx @@ -78,6 +78,18 @@ cdef class DTWWps: def __init__(self, l1, l2, DTWSettings settings): self._wps = dtaidistancec_dtw.dtw_wps_parts(l1, l2, &settings._settings) + @property + def ri1(self): + return self._wps.ri1 + + @property + def ri2(self): + return self._wps.ri2 + + @property + def ri3(self): + return self._wps.ri3 + cdef class DTWSettings: def __cinit__(self): @@ -539,6 +551,11 @@ def warping_path_ndim(seq_t[:, :] s1, seq_t[:, :] s2, int ndim=1, include_distan return path, dist return path +def wps_negativize_value(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t r, Py_ssize_t c): + dtaidistancec_dtw.dtw_wps_negativize_value(&p._wps, &wps[0,0], l1, l2, r, c) + +def wps_positivize_value(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t r, Py_ssize_t c): + dtaidistancec_dtw.dtw_wps_positivize_value(&p._wps, &wps[0,0], l1, l2, r, c) def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) @@ -546,10 +563,9 @@ def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_s def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) -def wps_max(DTWWps p, seq_t[:, :] wps): +def wps_max(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2): cdef Py_ssize_t r, c - result = dtaidistancec_dtw.dtw_wps_max(&p._wps, &wps[0,0], &r, &c, - wps.shape[0] - 1, wps.shape[1] - 1) + result = dtaidistancec_dtw.dtw_wps_max(&p._wps, &wps[0, 0], &r, &c, l1, l2) return r, c def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize_t l2, @@ -559,13 +575,13 @@ def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize l1, l2, rb, re, cb, ce, &settings._settings) -def wps_print(seq_t[:, :] wps, **kwargs): +def wps_print(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) + dtaidistancec_dtw.dtw_print_wps(&wps[0,0], l1, l2, &settings._settings) -def wps_print_compact(seq_t[:, :] wps, **kwargs): +def wps_print_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) + dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], l1, l2, &settings._settings) def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): cdef Py_ssize_t path_length; @@ -588,11 +604,9 @@ def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): PyMem_Free(i2) return path -def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): +def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rs, Py_ssize_t cs, **kwargs): cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) - l1 = wps.shape[0] - 1 - l2 = wps.shape[1] - 1 cdef Py_ssize_t *i1 = PyMem_Malloc((l1 + l2) * sizeof(Py_ssize_t)) if not i1: raise MemoryError() diff --git a/dtaidistance/dtw_visualisation.py b/dtaidistance/dtw_visualisation.py index 5521da4b..7a94bf14 100644 --- a/dtaidistance/dtw_visualisation.py +++ b/dtaidistance/dtw_visualisation.py @@ -189,8 +189,23 @@ def plot_warping_single_ax(s1, s2, path, filename=None, fig=None, ax=None): return fig, ax +def path_slice(path, rb=None, re=None, cb=None, ce=None): + path2 = [] + for t in path: + if rb is not None and t[0] < rb: + continue + if cb is not None and t[1] < cb: + continue + if re is not None and t[0] > (re - 1): + continue + if ce is not None and t[1] > (ce - 1): + continue + path2.append((t[0] - rb, t[1] - cb)) + return path2 + + def plot_warpingpaths(s1, s2, paths, path=None, filename=None, shownumbers=False, showlegend=False, - figure=None, matshow_kwargs=None): + figure=None, matshow_kwargs=None, includes_zero=True): """Plot the warping paths matrix. :param s1: Series 1 @@ -238,7 +253,7 @@ def plot_warpingpaths(s1, s2, paths, path=None, filename=None, shownumbers=False min_s1_x = np.min(s1) max_s1_y = len(s1) - if path is None: + if path is None and includes_zero is True: p = dtw.best_path(paths) elif path == -1: p = None @@ -282,7 +297,10 @@ def format_fn2_y(tick_val, tick_pos): ax3 = fig.add_subplot(gs[1, 1]) # ax3.set_aspect(1) kwargs = {} if matshow_kwargs is None else matshow_kwargs - img = ax3.matshow(paths[1:, 1:], **kwargs) + if includes_zero: + img = ax3.matshow(paths[1:, 1:], **kwargs) + else: + img = ax3.matshow(paths, **kwargs) # ax3.grid(which='major', color='w', linestyle='-', linewidth=0) # ax3.set_axis_off() if p is not None: diff --git a/dtaidistance/jinja/dtw_cc.jinja.pyx b/dtaidistance/jinja/dtw_cc.jinja.pyx index aebdb8ca..b8477f2d 100644 --- a/dtaidistance/jinja/dtw_cc.jinja.pyx +++ b/dtaidistance/jinja/dtw_cc.jinja.pyx @@ -78,6 +78,18 @@ cdef class DTWWps: def __init__(self, l1, l2, DTWSettings settings): self._wps = dtaidistancec_dtw.dtw_wps_parts(l1, l2, &settings._settings) + @property + def ri1(self): + return self._wps.ri1 + + @property + def ri2(self): + return self._wps.ri2 + + @property + def ri3(self): + return self._wps.ri3 + cdef class DTWSettings: def __cinit__(self): @@ -378,6 +390,11 @@ def wps_width(Py_ssize_t l1, Py_ssize_t l2, **kwargs): {% set suffix = '_ndim' %} {%- include 'dtw_cc_warpingpath.jinja.pyx' %} +def wps_negativize_value(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t r, Py_ssize_t c): + dtaidistancec_dtw.dtw_wps_negativize_value(&p._wps, &wps[0,0], l1, l2, r, c) + +def wps_positivize_value(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t r, Py_ssize_t c): + dtaidistancec_dtw.dtw_wps_positivize_value(&p._wps, &wps[0,0], l1, l2, r, c) def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): dtaidistancec_dtw.dtw_wps_negativize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) @@ -385,10 +402,9 @@ def wps_negativize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_s def wps_positivize(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rb, Py_ssize_t re, Py_ssize_t cb, Py_ssize_t ce): dtaidistancec_dtw.dtw_wps_positivize(&p._wps, &wps[0,0], l1, l2, rb, re, cb, ce) -def wps_max(DTWWps p, seq_t[:, :] wps): +def wps_max(DTWWps p, seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2): cdef Py_ssize_t r, c - result = dtaidistancec_dtw.dtw_wps_max(&p._wps, &wps[0,0], &r, &c, - wps.shape[0] - 1, wps.shape[1] - 1) + result = dtaidistancec_dtw.dtw_wps_max(&p._wps, &wps[0, 0], &r, &c, l1, l2) return r, c def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize_t l2, @@ -398,13 +414,13 @@ def wps_expand_slice(seq_t[:, :] wps, seq_t[:, :] slice, Py_ssize_t l1, Py_ssize l1, l2, rb, re, cb, ce, &settings._settings) -def wps_print(seq_t[:, :] wps, **kwargs): +def wps_print(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) + dtaidistancec_dtw.dtw_print_wps(&wps[0,0], l1, l2, &settings._settings) -def wps_print_compact(seq_t[:, :] wps, **kwargs): +def wps_print_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): settings = DTWSettings(**kwargs) - dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], wps.shape[0]-1, wps.shape[1]-1, &settings._settings) + dtaidistancec_dtw.dtw_print_wps_compact(&wps[0,0], l1, l2, &settings._settings) def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): cdef Py_ssize_t path_length; @@ -427,11 +443,9 @@ def best_path_compact(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, **kwargs): PyMem_Free(i2) return path -def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t rs, Py_ssize_t cs, **kwargs): +def best_path_compact_affinity(seq_t[:, :] wps, Py_ssize_t l1, Py_ssize_t l2, Py_ssize_t rs, Py_ssize_t cs, **kwargs): cdef Py_ssize_t path_length; settings = DTWSettings(**kwargs) - l1 = wps.shape[0] - 1 - l2 = wps.shape[1] - 1 cdef Py_ssize_t *i1 = PyMem_Malloc((l1 + l2) * sizeof(Py_ssize_t)) if not i1: raise MemoryError() diff --git a/dtaidistance/subsequence/dtw.py b/dtaidistance/subsequence/dtw.py index 6854c016..61b81daf 100644 --- a/dtaidistance/subsequence/dtw.py +++ b/dtaidistance/subsequence/dtw.py @@ -8,713 +8,11 @@ DTW-based subsequence matching. :author: Wannes Meert -:copyright: Copyright 2021-2022 KU Leuven, DTAI Research Group. +:copyright: Copyright 2021-2023 KU Leuven, DTAI Research Group. :license: Apache License, Version 2.0, see LICENSE for details. """ -import logging -import numpy.ma as ma -from .. import dtw # import warping_paths, warping_paths_fast, best_path, warping_paths_affinity, distance -from .. import dtw_ndim -from .. import util_numpy -from .. import util - - -try: - if util_numpy.test_without_numpy(): - raise ImportError() - import numpy as np - argmin = np.argmin - argmax = np.argmax - array_min = np.min - array_max = np.max -except ImportError: - np = None - argmin = util.argmin - argmax = util.argmax - array_min = min - array_max = max - - -logger = logging.getLogger("be.kuleuven.dtai.distance") - - -dtw_cc = None -try: - from . import dtw_cc -except ImportError: - dtw_cc = None - - -def subsequence_alignment(query, series, use_c=False): - """See SubsequenceAligment. - - :param query: - :param series: - :return: - """ - sa = SubsequenceAlignment(query, series, use_c=use_c) - sa.align() - return sa - - -class SAMatch: - def __init__(self, idx, alignment): - """SubsequenceAlignment match""" - self.idx = idx - self.alignment = alignment - - @property - def value(self): - """Normalized DTW distance of match. - - Normalization is the DTW distance divided by the query length. - """ - return self.alignment.matching[self.idx] - - @property - def distance(self): - """DTW distance of match. - - This value is dependent on the length of the query. Use the value - property when comparing queries of different lengths. - """ - return self.value * len(self.alignment.query) - - @property - def segment(self): - """Matched segment in series.""" - start = self.alignment.matching_function_startpoint(self.idx) - end = self.alignment.matching_function_endpoint(self.idx) - return [start, end] - - @property - def path(self): - """Matched path in series""" - return self.alignment.matching_function_bestpath(self.idx) - - def __str__(self): - return f'SAMatch({self.idx})' - - def __repr__(self): - return self.__str__() - - -class SubsequenceAlignment: - def __init__(self, query, series, penalty=0.1, use_c=False): - """Subsequence alignment using DTW. - Find where the query occurs in the series. - - Based on Fundamentals of Music Processing, Meinard Müller, Springer, 2015. - - Example:: - - query = np.array([1., 2, 0]) - series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0]) - sa = subsequence_search(query, series) - mf = sa.matching_function() - sa.kbest_matches(k=2) - - - :param query: Subsequence to search for - :param series: Long sequence in which to search - :param penalty: Penalty for non-diagonal matching - :param use_c: Use the C-based DTW function if available - """ - self.query = query - self.series = series - self.penalty = penalty - self.paths = None - self.matching = None - self.use_c = use_c - - def reset(self): - self.matching = None - - def align(self): - if self.matching is not None: - return - psi = [0, 0, len(self.series), len(self.series)] - if np is not None and isinstance(self.series, np.ndarray) and len(self.series.shape) > 1: - if not self.use_c: - _, self.paths = dtw_ndim.warping_paths(self.query, self.series, penalty=self.penalty, psi=psi, - psi_neg=False) - else: - _, self.paths = dtw_ndim.warping_paths_fast(self.query, self.series, penalty=self.penalty, psi=psi, - compact=False, psi_neg=False) - else: - if not self.use_c: - _, self.paths = dtw.warping_paths(self.query, self.series, penalty=self.penalty, psi=psi, - psi_neg=False) - else: - _, self.paths = dtw.warping_paths_fast(self.query, self.series, penalty=self.penalty, psi=psi, - compact=False, psi_neg=False) - self._compute_matching() - - def align_fast(self): - self.use_c = True - return self.align() - - def _compute_matching(self): - matching = self.paths[-1, :] - if len(matching) > len(self.series): - matching = matching[-len(self.series):] - self.matching = np.array(matching) / len(self.query) - - def warping_paths(self): - """Get matrix with all warping paths. - - If the aligmnent was computed using a compact, the paths are first copied into a full - warping paths matrix. - - :return: Numpy matrix of size (len(query)+1) * (len(series)+1) - """ - return self.paths - - def matching_function(self): - """The matching score for each end-point of a possible match.""" - return self.matching - - def get_match(self, idx): - return SAMatch(idx, self) - - def best_match_fast(self): - self.use_c = True - return self.best_match() - - def best_match(self): - best_idx = np.argmin(self.matching) - return self.get_match(best_idx) - - def kbest_matches_fast(self, k=1, overlap=0): - self.use_c = True - return self.kbest_matches(k=k, overlap=overlap) - - def kbest_matches(self, k=1, overlap=0): - """Yields the next best match. Stops at k matches (use None for all matches). - - :param k: Number of matches to yield. None is all matches. - :param overlap: Matches cannot overlap unless overlap > 0. - :return: Yield an SAMatch object - """ - self.align() - matching = np.array(self.matching) - maxv = np.ceil(np.max(matching) + 1) - matching[:min(len(self.query) - 1, overlap)] = maxv - ki = 0 - while k is None or ki < k: - best_idx = np.argmin(matching) - if best_idx == 0 or np.isinf(matching[best_idx]) or matching[best_idx] == maxv: - # No more matches found - break - match = self.get_match(best_idx) - b, e = match.segment - cur_overlap = min(overlap, e - b - 1) - mb, me = best_idx + 1 - (e - b) + cur_overlap, best_idx + 1 - if np.isinf(np.max(matching[mb:me])): - # No overlapping matches - matching[best_idx] = maxv - continue - matching[mb:me] = np.inf - ki += 1 - yield match - - def matching_function_segment(self, idx): - """Matched segment in series.""" - start = self.matching_function_startpoint(idx) - end = self.matching_function_endpoint(idx) - return [start, end] - - def matching_function_endpoint(self, idx): - """Index in series for end of match in matching function at idx. - - :param idx: Index in matching function - :return: Index in series - """ - if len(self.matching) == len(self.series): - return idx - diff = len(self.series) - len(self.matching) - return idx + diff - - def matching_function_startpoint(self, idx): - """Index in series for start of match in matching function at idx. - - :param idx: Index in matching function - :return: Index in series - """ - real_idx = idx + 1 - path = dtw.best_path(self.paths, col=real_idx) - start_idx = path[0][1] - return start_idx - - def matching_function_bestpath(self, idx): - """Indices in series for best path for match in matching function at idx. - - :param idx: Index in matching function - :return: List of (row, col) - """ - real_idx = idx + 1 - path = dtw.best_path(self.paths, col=real_idx) - return path - - -def local_concurrences(series1, series2=None, gamma=1, tau=0, delta=0, delta_factor=1, estimate_settings=None, - only_triu=False, penalty=None, window=None): - """Local concurrences, see LocalConcurrences. - - :param series1: - :param series2: - :param gamma: Affinity transformation exp(-gamma*(s1[i] - s2[j])**2) - :param tau: threshold parameter - :param delta: penalty parameter - Should be negative. Added instead of the affinity score (if score below tau threshold parameter). - :param delta_factor: multiply cumulative score (e.g. by 0.5). - This is useful to have the same impact at different locations in the warping paths matrix, which - is cumulative (and thus typically large in one corner and small in the opposite corner). - :param estimate_settings: Estimate tau, delta, delta_factor from given series. Will be passed as - tau_std to estimate_settings_from_std. - :param only_triu: Only compute the upper traingle matrix values. Useful to avoid redundant computations - when series1 is equal to series2 (or equivalently if series2 is None). - :param penalty: Penalty that is added when dynamic programming is using moving vertically or horizontally - through the matrix instead of diagonally. Used to prefer diagonal paths. - :return: - """ - lc = LocalConcurrences(series1, series2, gamma, tau, delta, delta_factor, - only_triu=only_triu, penalty=penalty, window=window) - if estimate_settings is not None: - lc.estimate_settings_from_std(series1, estimate_settings) - lc.align() - return lc - - -class LCMatch: - def __init__(self, lc, row=None, col=None): - """LocalConcurrences match""" - self.row = row # type: int - self.col = col # type: int - self.lc = lc # type: LocalConcurrences - self._path = None - - @property - def path(self): - if self._path is not None: - return self._path - # TODO: always storing the path might be memory hungry - # but recomputing is impossible since the values are negated/masked afterwards - self._path = self.lc.best_path(self.row, self.col) - return self._path - - def __str__(self): - return f'LCMatch({self.row, self.col})' - - def __repr__(self): - return self.__str__() - - -class LocalConcurrences: - def __init__(self, series1, series2=None, gamma=1, tau=0, delta=0, delta_factor=1, only_triu=False, penalty=None, window=None): - """Version identification based on local concurrences. - - Find recurring patterns across two time series. Used to identify whether one time series is - a version of another. If the two time series are the same one, it can be used to find typical - or frequent patterns in a time series. - - Based on 7.3.2 Identification Procedure in Fundamentals of Music Processing, Meinard Müller, Springer, 2015. - - Different from the original formulation, D_tau is introduced based on the given delta factor. - This makes the penalty less sensitive to the cumulative effect of the paths in the - self-similarity matrix S: - - S_tau(n,m) = S(n,m) if S(n,m) >= tau (with tau >= 0) - delta if S(n,m) < tau (with delta <= 0) - - And for the accumulated score matrix D: - - D_tau(n,m) = max(0, - df * D_tau(n−1,m−1) + S_tau(n,m), - df * D_tau(n−1,m) + S_tau(n,m), - df * D_tau(n,m−1) + S_tau(n,m)) - where df = 1 if S(n,m) >= tau and df=delta_factor (<=1) otherwise, - - :param series1: First time series. - :param series2: Second time series. If empty, series1 is used and compared with itself. - :param gamma: Affinity transformation exp(-gamma*(s1[i] - s2[j])**2), should be >0 - :param tau: threshold parameter, should be >= 0 - :param delta: penalty parameter, should be <= 0 - :param delta_factor: penalty factor parameter, should be <= 1 - :param only_triu: Only consider upper triangular matrix in warping paths. - """ - self.series1 = series1 - if series2 is None: - # Self-comparison - self.series2 = self.series1 - self.only_triu = True - else: - self.series2 = series2 - if len(series1) == len(series2): - self.only_triu = only_triu - else: - self.only_triu = False - self.gamma = gamma - self.tau = tau - self.delta = delta - self.delta_factor = delta_factor - self.penalty = penalty - self.window = window - self._wp = None # warping paths - - def reset(self): - self._wp = None - - def estimate_settings_from_std(self, series, tau_std=0.33): - """ - - :param series: - :param tau_std: Set tau to differences larger than tau_std time standard deviation of - the given series (default is 0.33, or reject differences that are larger than - the deviation wrt to the mean of 75% of the values in the series, assuming a - normal distribution). - :return: - """ - diffp = tau_std * np.std(series) - self.delta = -2 * np.exp(-self.gamma * diffp ** 2) - self.delta_factor = 0.5 - self.tau = np.exp(-self.gamma * diffp ** 2) - - def align(self): - """ - - :return: - """ - if self._wp is not None: - return - _, wp = dtw.warping_paths_affinity(self.series1, self.series2, - gamma=self.gamma, tau=self.tau, - delta=self.delta, delta_factor=self.delta_factor, - only_triu=self.only_triu, penalty=self.penalty, - window=self.window) - self._wp = ma.masked_array(wp) - if self.only_triu: - il = np.tril_indices(self._wp.shape[0]) - self._wp[il] = ma.masked - - @property - def wp(self): - return self._wp.data - - def best_match(self): - idx = np.unravel_index(np.argmax(self._wp, axis=None), self._wp.shape) - r, c = idx - lcm = LCMatch(self, r, c) - # path = lcm.path - # for (x, y) in path: - # self._wp[x + 1, y + 1] = ma.masked - return lcm - - def kbest_matches(self, k=1, minlen=2, buffer=0): - """Yields the next best match. Stops at k matches (use None for all matches). - - :param k: Number of matches to yield. None is all matches. - :param minlen: Consider only matches of length longer than minlen - :param buffer: Matches cannot be closer than buffer to each other. - :return: Yield an LCMatch object - """ - ki = 0 - while k is None or ki < k: - idx = None - lcm = None - while idx is None: - idx = np.unravel_index(np.argmax(self._wp, axis=None), self._wp.shape) - if idx[0] == 0 or idx[1] == 0: - return None - r, c = idx - lcm = LCMatch(self, r, c) - for (x, y) in lcm.path: - x += 1 - y += 1 - if len(self._wp.mask.shape) > 0 and self._wp.mask[x, y] is True: # True means invalid - # print('found path contains masked, restart') - lcm = None - idx = None - break - else: - self._wp[x, y] = ma.masked - if len(lcm.path) < minlen: - # print('found path too short, restart') - lcm = None - idx = None - if buffer > 0 and lcm is not None: - miny, maxy = 0, self._wp.shape[1] - 1 - minx, maxx = 0, self._wp.shape[0] - 1 - for (x, y) in lcm.path: - xx = x + 1 - for yy in range(max(miny, y + 1 - buffer), min(maxy, y + 1 + buffer)): - self._wp[xx, yy] = ma.masked - yy = y + 1 - for xx in range(max(minx, x + 1 - buffer), min(maxx, x + 1 + buffer)): - self._wp[xx, yy] = ma.masked - if lcm is not None: - ki += 1 - yield lcm - - def best_path(self, row, col): - if self._wp is None: - return None - argm = argmax - i = row - j = col - p = [(i - 1, j - 1)] - # prev = self._wp[i, j] - while i > 0 and j > 0: - values = [self._wp.data[i - 1, j - 1], self._wp.data[i - 1, j], self._wp.data[i, j - 1]] - # print(f'{i=}, {j=}, {argm(values)=}, {ma.argmax(values)=}, {values=}') - c = argm(values) - # if values[c] is ma.masked: - # break - if values[c] <= 0: # values[c] > prev: - break - # prev = values[c] - if c == 0: - if self._wp[i - 1, j - 1] is ma.masked: - break - i, j = i - 1, j - 1 - elif c == 1: - if self._wp[i - 1, j] is ma.masked: - break - i = i - 1 - elif c == 2: - if self._wp[i, j - 1] is ma.masked: - break - j = j - 1 - p.append((i - 1, j - 1)) - if p[-1][0] < 0 or p[-1][1] < 0: - p.pop() - p.reverse() - return p - - -def subsequence_search(query, series, dists_options=None, use_lb=True, - max_dist=None, max_value=None, use_c=None): - """See SubsequenceSearch. - - :param query: Time series to search for - :param series: Iterator over time series to perform search on. - This can be for example windows over a long time series. - :param dists_options: Options passed on to `dtw.distance` - :param use_lb: Use lowerbounds to early abandon options - :param max_dist: Ignore DTW distances larger than this value - :param max_value: Ignore normalized DTW distances larger than this value - :param use_c: Use fast C implementation if available - :return: SubsequenceSearch object - """ - ss = SubsequenceSearch(query, series, dists_options=dists_options, use_lb=use_lb, - max_dist=max_dist, max_value=max_value, use_c=use_c) - return ss - - -class SSMatch: - """Found match by SubsequenceSearch. - - The match is identified by the idx property, which is the index of the matched - series in the original list of series. The distance property returns the DTW - distance between the query and the series at index idx. - """ - def __init__(self, kidx, ss): - self.kidx = kidx - self.ss = ss - - @property - def distance(self): - """DTW distance.""" - return self.ss.kbest_distances[self.kidx][0] - - @property - def value(self): - """Normalized DTW distance.""" - return self.distance / len(self.ss.query) - - @property - def idx(self): - return self.ss.kbest_distances[self.kidx][1] - - def __str__(self): - return f'SSMatch({self.idx})' - - def __repr__(self): - return self.__str__() - - -class SSMatches: - def __init__(self, ss): - self.ss = ss - - def __getitem__(self, key): - if isinstance(key, slice): - start = 0 if key.start is None else key.start - return [SSMatch(kip+start, self.ss) for kip, (v, i) in - enumerate(self.ss.kbest_distances[key])] - return SSMatch(key, self.ss) - - def __iter__(self): - for ki, (v, i) in enumerate(self.ss.kbest_distances): - yield SSMatch(ki, self.ss) - - def __len__(self): - if self.ss.kbest_distances is None: - return 0 - return len(self.ss.kbest_distances) - - def __str__(self): - if len(self.ss.kbest_distances) > 10: - return '[' + ', '.join(str(m) for m in self[:5]) + ' ... ' +\ - ', '.join(str(m) for m in self[-5:]) + ']' - return '[' + ', '.join(str(m) for m in self) + ']' - - -class SubsequenceSearch: - def __init__(self, query, s, dists_options=None, use_lb=True, keep_all_distances=False, - max_dist=None, max_value=None, use_c=None, use_ndim=None): - """Search the best matching (subsequence) time series compared to a given time series. - - :param query: Time series to search for - :param s: Iterator over time series to perform search on. - This can be for example windows over a long time series. - :param dists_options: Options passed on to `dtw.distance` - :param use_lb: Use lowerbounds to early abandon options - :param max_dist: Ignore DTW distances larger than this value - if max_dist is also given in dists_options, then the one in dists_options is ignored - if both max_dist and max_value are given, the smallest is used - :param max_value: Ignore normalized DTW distances larger than this value - """ - self.query = query - if use_ndim is None: - self.use_ndim = (util.detect_ndim(query) > 1) - else: - self.use_ndim = use_ndim - self.s = s - self.distances = None - self.kbest_distances = None - self.lbs = None - self.k = None - self.dists_options = {} if dists_options is None else dists_options - if max_dist is None: - self.max_dist = self.dists_options.get('max_dist', np.inf) - else: - self.max_dist = max_dist - if max_value is not None: - self.max_dist = min(self.max_dist, max_value * len(self.query)) - self.dists_options['max_dist'] = self.max_dist - if use_c is not None: - self.dists_options['use_c'] = use_c - self.use_lb = use_lb - - self.keep_all_distances = keep_all_distances - # if self.use_lb and not self.keep_all_distances: - # raise ValueError("If use_lb is true, then keep_all_distances should also be true.") - - def reset(self): - self.distances = None - self.kbest_distances = None - self.lbs = None - - # def compute_lbs(self): - # self.lbs = np.zeros((len(self.s),)) - # for idx, series in enumerate(self.s): - # self.lbs[idx] = dtw.lb_keogh(self.query, series, **self.dists_options) - - def align_fast(self, k=None): - self.dists_options['use_c'] = True - return self.align(k=k) - - def align(self, k=None): - if self.use_ndim: - distance = dtw_ndim.distance - lb_keogh = None - if self.use_lb: - self.use_lb = False - logger.warning('The setting use_lb is ignored for multivariate series.') - else: - distance = dtw.distance - lb_keogh = dtw.lb_keogh - if self.distances is not None and self.k >= k: - return - if k is None or self.keep_all_distances: - self.distances = np.zeros((len(self.s),)) - # if self.use_lb: - # self.compute_lbs() - import heapq - h = [(-np.inf, -1)] - max_dist = self.max_dist - for idx, series in enumerate(self.s): - if self.use_lb: - lb = lb_keogh(self.query, series, **self.dists_options) - if lb > max_dist: - continue - dist = distance(self.query, series, **self.dists_options) - if k is not None: - if len(h) < k: - if not np.isinf(dist) and dist <= max_dist: - heapq.heappush(h, (-dist, idx)) - max_dist = min(max_dist, -h[0][0]) - else: - if not np.isinf(dist) and dist <= max_dist: - heapq.heappushpop(h, (-dist, idx)) - max_dist = min(max_dist, -h[0][0]) - self.dists_options['max_dist'] = max_dist - if self.keep_all_distances or k is None: - self.distances[idx] = dist - if k is not None: - # hh = np.array([-v for v, _ in h]) - # self.kbest_distances = [(-h[i][0], h[i][1]) for i in np.argsort(hh)] - self.kbest_distances = sorted((-v, i) for v, i in h if i != -1) - else: - self.kbest_distances = [(self.distances[i], i) for i in np.argsort(self.distances)] - - self.k = k - return self.kbest_distances - - def get_ith_value(self, i): - """Return the i-th value from the k-best values. - - :param i: Return i-th best value (i < k) - :return: (distance, index) - """ - if self.distances is None or self.k is None: - raise ValueError('Align should be called before asking for the i-th value.') - if i > self.k: - raise ValueError('The i-th value is not available, i={}>k={}'.format(i, self.k)) - return self.kbest_distances[i] - - def best_match_fast(self): - self.dists_options['use_c'] = True - return self.best_match() - - def best_match(self): - self.align(k=1) - # _value, best_idx = self.kbest_distances[0] - return SSMatch(0, self) - - def kbest_matches_fast(self, k=1): - self.dists_options['use_c'] = True - return self.kbest_matches(k=k) - - def kbest_matches(self, k=1): - """Return the k best matches. - - It is recommended to set k to a value, and not None. - If k is set to None, all comparisons are kept and returned. Also no early - stopping is applied in case k is None. - - :param k: Number of best matches to return (default is 1) - :return: List of SSMatch objects - """ - self.align(k=k) - # if k is None: - # return [SSMatch(best_idx, self) for best_idx in range(len(self.distances))] - # if self.keep_all_distances: - # best_idxs = np.argpartition(self.distances, k) - # return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] - # distances = reversed(sorted(self.h)) - # return [SSMatch(best_idx, self) for dist, best_idx in distances] - return SSMatches(self) +from .subsequencealignment import * +from .localconcurrences import * +from .subsequencesearch import * diff --git a/dtaidistance/subsequence/localconcurrences.py b/dtaidistance/subsequence/localconcurrences.py new file mode 100644 index 00000000..ece843d6 --- /dev/null +++ b/dtaidistance/subsequence/localconcurrences.py @@ -0,0 +1,732 @@ +# -*- coding: UTF-8 -*- +""" +dtaidistance.subsequence.localconcurrences +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +(requires version 2.3.0 or higher) + +DTW-based subsequence matching. + +:author: Wannes Meert +:copyright: Copyright 2021-2023 KU Leuven, DTAI Research Group. +:license: Apache License, Version 2.0, see LICENSE for details. + +""" +import logging +import math +from functools import partial + +from .. import dtw # import warping_paths, warping_paths_fast, best_path, warping_paths_affinity, distance +from .. import dtw_ndim +from .. import util_numpy +from .. import util + + +try: + if util_numpy.test_without_numpy(): + raise ImportError() + import numpy as np + import numpy.ma as ma + argmin = np.argmin + argmax = np.argmax + array_min = np.min + array_max = np.max +except ImportError: + np = None + ma = None + argmin = util.argmin + argmax = util.argmax + array_min = min + array_max = max + + +logger = logging.getLogger("be.kuleuven.dtai.distance") + + +dtw_cc = None +try: + from . import dtw_cc +except ImportError: + dtw_cc = None + + +def local_concurrences(series1, series2=None, gamma=1, tau=0, delta=0, delta_factor=1, estimate_settings=None, + only_triu=None, penalty=None, window=None, use_c=False, compact=None): + """Local concurrences, see LocalConcurrences. + + :param series1: + :param series2: + :param gamma: Affinity transformation exp(-gamma*(s1[i] - s2[j])**2) + :param tau: threshold parameter + :param delta: penalty parameter + Should be negative. Added instead of the affinity score (if score below tau threshold parameter). + :param delta_factor: multiply cumulative score (e.g. by 0.5). + This is useful to have the same impact at different locations in the warping paths matrix, which + is cumulative (and thus typically large in one corner and small in the opposite corner). + :param estimate_settings: Estimate tau, delta, delta_factor from given series. Will be passed as + tau_std to estimate_settings_from_std. + :param only_triu: Only compute the upper traingle matrix values. Useful to avoid redundant computations + when series1 is equal to series2 (or equivalently if series2 is None). + :param penalty: Penalty that is added when dynamic programming is using moving vertically or horizontally + through the matrix instead of diagonally. Used to prefer diagonal paths. + :param compact: Use the compact representation for the warping paths matrix (only when use_c is true). + :return: + """ + lc = LocalConcurrences(series1, series2, gamma, tau, delta, delta_factor, + only_triu=only_triu, penalty=penalty, window=window, use_c=use_c, compact=compact) + if estimate_settings is not None: + lc.estimate_settings_from_std(series1, series2, tau_std=estimate_settings) + lc.align() + return lc + + +class LCMatch: + def __init__(self, lc, row=None, col=None): + """LocalConcurrences match""" + self.row = row # type: int + self.col = col # type: int + self.lc = lc # type: LocalConcurrences + self._path = None + + @property + def path(self): + if self._path is not None: + return self._path + # TODO: always storing the path might be memory hungry + # but recomputing is impossible since the values are negated/masked afterwards + self._path = self.lc.best_path(self.row, self.col) + return self._path + + def distance(self, do_sqrt=True): + if self._path is None: + return None + d = 0 + for r, c in self._path: + d += (self.lc.series1[r] - self.lc.series2[c])**2 + if do_sqrt: + d = math.sqrt(d) + return d + + def __str__(self): + return f'LCMatch({self.row, self.col})' + + def __repr__(self): + return self.__str__() + + +class LCMatches: + def __init__(self, lc, matches=None): + self._lc = lc + self._matches = [] + if matches is not None: + self._matches.update(matches) + + def __iter__(self): + return self._matches.__iter__() + + def append(self, match): + self._matches.append(match) + + def covered(self): + s1 = np.zeros(len(self._lc.series1), dtype=np.bool) + s2 = np.zeros(len(self._lc.series2), dtype=np.bool) + for match in self._matches: + path = match.path + s1[path[0][0]:path[-1][0]+1] = True + s2[path[0][1]:path[-1][1]+1] = True + return s1, s2 + + def coverage(self): + s1, s2 = self.covered() + c1 = np.sum(s1) / len(s1) + c2 = np.sum(s2) / len(s2) + return c1, c2 + + def segments(self): + s1, s2 = [], [] + for match in self._matches: + p = match.path + s1.append((p[0][0], p[-1][0])) + s2.append((p[0][1], p[-1][1])) + return s1, s2 + + def missing(self): + s1, s2 = self.coverage() + s1 = ~s1 + s2 = ~s2 + return s1, s2 + + def missing_segments(self): + b1, b2 = self.covered() + s1, s2 = self.segments() + for sb, se in s1: + b1[sb] = False + b1[se] = False + for sb, se in s2: + b2[sb] = False + b2[se] = False + + inmissing = False + ms1 = [] + lstart = None + for i in range(len(b1)): + if inmissing: + if b1[i]: + ms1.append((lstart, i)) + inmissing = False + else: + if not b1[i]: + lstart = i + inmissing = True + + inmissing = False + ms2 = [] + lstart = None + for i in range(len(b2)): + if inmissing: + if b2[i]: + ms2.append((lstart, i)) + inmissing = False + else: + if not b2[i]: + lstart = i + inmissing = True + return ms1, ms2 + + def distance(self, do_sqrt=True): + d = 0 + for m in self._matches: + d += m.distance(do_sqrt=False) + if do_sqrt: + d = math.sqrt(d) + return d + + def distance_compensated(self, penalty=None, max_factor=10): + """Distance with compensation for missed parts in sequences. + + :param penalty: Base penalty per missing step in the joint path + :param max_factor: Number >1 + """ + if penalty is None: + penalty = 1/self._lc.gamma + d = self.distance(do_sqrt=False) + c1, c2 = self.coverage() + perc_missing = 1 - max(c1, c2) + nb_missing = max((1-c1)*len(self._lc.series1), (1-c2)*len(self._lc.series2)) + if max_factor is not None: + factor = 1 + ((max_factor - 1) * perc_missing) + else: + factor = 1 + d += factor * penalty * nb_missing + d = math.sqrt(d) + return d + + def plot(self, begin=None, end=None, showlegend=False, showpaths=True, showboundaries=True): + from .. import dtw_visualisation as dtwvis + if begin is None and end is None: + series1 = self._lc.series1 + series2 = self._lc.series2 + wp = self._lc.wp_slice() + begin = 0 + elif begin is None: + series1 = self._lc.series1[:end] + series2 = self._lc.series2[:end] + wp = self._lc.wp_slice(re=end, ce=end) + begin = 0 + elif end is None: + series1 = self._lc.series1[begin:] + series2 = self._lc.series2[begin:] + wp = self._lc.wp_slice(rb=begin, cb=begin) + else: + series1 = self._lc.series1[begin:end] + series2 = self._lc.series2[begin:end] + wp = self._lc.wp_slice(rb=begin, re=end, cb=begin, ce=end) + if begin is not None and begin > 0: + includes_zero = False + else: + includes_zero = True + fig, ax = dtwvis.plot_warpingpaths(series1, series2, wp, path=-1, showlegend=showlegend, includes_zero=includes_zero) + if showpaths: + nb_plotted = 0 + for i, match in enumerate(self._matches): + path2 = [] + for t in match.path: + if begin is not None and (t[0] < begin or t[1] < begin): + continue + if end is not None and (t[0] > (end-1) or t[1] > (end-1)): + continue + path2.append((t[0] - begin, t[1] - begin)) + if len(path2) > 0: + nb_plotted += 1 + dtwvis.plot_warpingpaths_addpath(ax, path2) + print(f"Paths plotted: {nb_plotted}") + if showboundaries: + # s1, s2 = self.covered() + ss1, ss2 = self.segments() + sbs, ses = zip(*ss1) + sbs1 = [v - begin for v in sbs if (begin is None or v >= begin) and (end is None or v <= end)] + ses1 = [v - begin for v in ses if (begin is None or v >= begin) and (end is None or v <= end)] + ax[3].hlines(sbs1, 0, len(series2) - 1, color='black', alpha=0.5) + ax[3].hlines(ses1, 0, len(series2) - 1, color='black', alpha=0.5) + sbs, ses = zip(*ss2) + sbs2 = [v - begin for v in sbs if (begin is None or v >= begin) and (end is None or v <= end)] + ses2 = [v - begin for v in ses if (begin is None or v >= begin) and (end is None or v <= end)] + ax[3].vlines(sbs2, 0, len(series1) - 1, color='black', alpha=0.5) + ax[3].vlines(ses2, 0, len(series1) - 1, color='black', alpha=0.5) + ymin = min(np.min(series1), np.min(series2)) + for idx, (sb, se) in enumerate(zip(sbs1, ses1)): + ax[2].plot([-ymin, -ymin], [len(series1)-sb, len(series1)-se], color='blue', linewidth=2, alpha=0.5) + for idx, (sb, se) in enumerate(zip(sbs2, ses2)): + ax[1].plot([sb, se], [ymin, ymin], color='blue', linewidth=2, alpha=0.5) + return fig, ax + + def str(self, maxlength=10): + return '[' + ', '.join(str(m) for m in self._matches[:maxlength]) + ']' + + def __str__(self): + return self.str() + + +class LocalConcurrences: + def __init__(self, series1, series2=None, gamma=1, tau=0, delta=0, delta_factor=1, only_triu=False, + penalty=None, window=None, use_c=False, compact=None): + """Version identification based on local concurrences. + + Find recurring patterns across two time series. Used to identify whether one time series is + a version of another. If the two time series are the same one, it can be used to find typical + or frequent patterns in a time series. + + Based on 7.3.2 Identification Procedure in Fundamentals of Music Processing, Meinard Müller, Springer, 2015. + + Different from the original formulation, D_tau is introduced based on the given delta factor. + This makes the penalty less sensitive to the cumulative effect of the paths in the + self-similarity matrix S: + + S_tau(n,m) = S(n,m) if S(n,m) >= tau (with tau >= 0) + delta if S(n,m) < tau (with tau >= 0 & delta <= 0) + + And for the accumulated score matrix D: + + D_tau(n,m) = max(0, + df * D_tau(n−1,m−1) + S_tau(n,m), + df * D_tau(n−1,m) + S_tau(n,m), + df * D_tau(n,m−1) + S_tau(n,m)) + where df = 1 if S(n,m) >= tau and df=delta_factor (<=1) otherwise, + + For finding paths the delta_factor has no influence. For the visualisation, + it helps as patterns exhibit more similar values in the D matrix. + + :param series1: First time series. + :param series2: Second time series. If empty, series1 is used and compared with itself. + :param gamma: Affinity transformation exp(-gamma*(s1[i] - s2[j])**2), should be >0 + :param tau: threshold parameter, should be >= 0 + :param delta: penalty parameter, should be <= 0 + :param delta_factor: penalty factor parameter, should be <= 1 + :param only_triu: Only consider upper triangular matrix in warping paths. + :param compact: Use the compact representation for the warping paths matrix (only when use_c is true). + """ + self.series1 = series1 + if series2 is None: + # Self-comparison + self.series2 = self.series1 + self.only_triu = True if only_triu is None else only_triu + else: + self.series2 = series2 + self.only_triu = False if only_triu is None else only_triu + self.gamma = gamma + self.tau = tau + self.delta = delta + self.delta_factor = delta_factor + self.penalty = penalty + self.window = window + self.use_c = use_c + if compact is None: + self.compact = self.use_c + else: + self.compact = compact + self._wp = None # warping paths + if self.use_c: + self._c_settings = dtw_cc.DTWSettings(window=self.window, penalty=self.penalty) + self._c_parts = dtw_cc.DTWWps(len(self.series1), len(self.series2), self._c_settings) + + @staticmethod + def from_other(lc, series1, series2=None): + lcn = LocalConcurrences(series1, series2, gamma=lc.gamma, tau=lc.tau, delta=lc.delta, + delta_factor=lc.delta_factor, only_triu=lc.only_triu, + penalty=lc.penalty, window=lc.window, use_c=lc.use_c, compact=lc.compact) + return lcn + + def reset(self): + self._wp = None + + def estimate_settings_from_std(self, series, series2=None, tau_std=0.33): + """Estimate delta, tau and delta_factor from series, tau_std and gamma. + + :param series: + :param tau_std: Set tau to differences larger than tau_std time standard deviation of + the given series (default is 0.33, or reject differences that are larger than + the deviation wrt to the mean of 75% of the values in the series, assuming a + normal distribution). + :return: + """ + return self.estimate_settings(series, series2, tau_type='std', tau_factor=tau_std) + + def estimate_settings_from_mean(self, series, series2=None, tau_mean=0.33): + return self.estimate_settings(series, series2, tau_type='mean', tau_factor=tau_mean) + + def estimate_settings_from_abs(self, series, series2=None, tau_abs=0.33): + return self.estimate_settings(series, series2, tau_type='abs', tau_factor=tau_abs) + + def estimate_settings(self, series, series2=None, tau_factor=0.33, tau_type='mean', gamma=None): + if tau_type != 'abs': + if series is None: + diffm = 1 + elif series2 is None: + if tau_type == 'std': + diffm = np.std(series) + elif tau_type == 'mean': + diffm = np.mean(series) + else: + diffm = 1 + else: + if tau_type == 'std': + diffm = np.std(np.abs(series - series2)) + elif tau_type == 'mean': + diffm = np.mean(np.abs(series - series2)) + else: + diffm = 1 + + if gamma is None: + # Intuition for gamma: + # Create an affinity matrix where + # differences up to the mean/std are in [e^-1, 1], + # larger differences are i [0, e^-1] + self.gamma = 1 / diffm**2 + else: + self.gamma = gamma + if tau_factor is not None: + diffp = tau_factor * diffm + else: + diffp = diffm + elif tau_type == 'abs': + diffp = tau_factor + else: + raise AttributeError('{} is not supported (not in mean, std, abs)'.format(tau_type)) + self.tau = np.exp(-self.gamma * diffp ** 2) + self.delta = -2 * self.tau + self.delta_factor = 0.90 + self.penalty = self.tau / 10 + + def align(self): + """ + + :return: + """ + if self._wp is not None: + return + if self.use_c: + fn = partial(dtw.warping_paths_affinity_fast, compact=self.compact) + else: + fn = dtw.warping_paths_affinity + _, wp = fn(self.series1, self.series2, + gamma=self.gamma, tau=self.tau, delta=self.delta, delta_factor=self.delta_factor, + only_triu=self.only_triu, penalty=self.penalty, window=self.window) + if self.compact: + self._wp = wp + else: + self._wp = ma.masked_array(wp) + self._reset_wp_mask() + # if self.only_triu: + # il = np.tril_indices(self._wp.shape[0]) + # self._wp[il] = ma.masked + + def align_fast(self): + use_c = self.use_c + self.use_c = True + result = self.align() + self.use_c = use_c + return result + + def _reset_wp_mask(self): + if self.compact: + dtw_cc.wps_positivize(self._c_parts, self._wp, + len(self.series1), len(self.series2), + 0, len(self.series1) + 1, + 0, len(self.series2) + 1) + else: + wp = self._wp + if self.window is None: + wp.mask = False + else: + windowdiff1 = max(0, wp.shape[1] - wp.shape[0]) + windowdiff2 = max(0, wp.shape[0] - wp.shape[1]) + il = np.tril_indices(n=wp.shape[0], k=-1 - self.window - windowdiff2, m=wp.shape[1]) + wp[il] = ma.masked + il = np.triu_indices(n=wp.shape[0], k=-self.window - windowdiff2, m=wp.shape[1]) + wp.mask[il] = False + il = np.triu_indices(n=wp.shape[0], k=1 + self.window + windowdiff1, m=wp.shape[1]) + wp[il] = ma.masked + if self.only_triu: + il = np.tril_indices(self._wp.shape[0], k=-1) + wp[il] = -np.inf + wp[il] = ma.masked + + def similarity_matrix(self): + sm = ma.masked_array(np.empty((len(self.series1), len(self.series2)))) + for r in range(len(self.series1)): + if self.window is None: + minc, maxc = 0, len(self.series2) + else: + minc, maxc = max(0, r - self.window), min(len(self.series2), r + self.window) + for c in range(minc): + sm[r, c] = ma.masked + for c in range(minc, maxc): + d = np.exp(-self.gamma * (self.series1[r] - self.series2[c]) ** 2) + sm[r, c] = self.delta if d < self.tau else d + for c in range(maxc, len(self.series2)): + sm[r, c] = ma.masked + return sm + + def similarity_matrix_matshow_kwargs(self, sm): + import matplotlib.pyplot as plt + from matplotlib.colors import BoundaryNorm + # viridis = cm.get_cmap('viridis', 256) + # newcolors = viridis(np.linspace(0, 1, 256)) + # pink = np.array([248 / 256, 24 / 256, 148 / 256, 1]) + # newcolors[:25, :] = pink + # newcmp = ListedColormap(newcolors) + # define the colormap + cmap = plt.get_cmap('Spectral') + # extract all colors from the .jet map + cmaplist = [cmap(i) for i in range(cmap.N)] + # create the new map + cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) + # define the bins and normalize and forcing 0 to be part of the colorbar! + sm[sm == -np.inf] = 0 + sm_max = max(1, np.max(sm)) + sm_min = min(self.delta, np.min(sm)) + # bounds_pos = np.arange(0, sm_max, .01) + bounds_pos = np.linspace(0, sm_max, 128) + bounds_neg = np.linspace(sm_min, 0, len(bounds_pos)) + bounds = np.concatenate((bounds_neg, bounds_pos)) + # bounds = np.arange(sm_min, sm_max, .01) + # idx = np.searchsorted(bounds, 0) + # bounds = np.insert(bounds, idx, 0) + norm = BoundaryNorm(bounds, cmap.N) + return {'cmap': cmap, 'norm': norm} + + @property + def wp(self): + if self.compact: + raise NotImplementedError("The full warping paths matrix is not available when using compact=True.\n" + "Use wp_slice to construct part of the matrix from the compact data structure.") + return self._wp.data + + def wp_slice(self, rb=None, re=None, cb=None, ce=None, positivize=False): + if rb is None: + rb = 0 + if re is None: + re = len(self.series1) + 1 + if cb is None: + cb = 0 + if ce is None: + ce = len(self.series2) + 1 + if not (0 <= rb <= len(self.series1) + 1 and + 0 <= re <= len(self.series1) + 1 and + 0 <= cb <= len(self.series2) + 1 and + 0 <= ce <= len(self.series2) + 1): + raise ValueError('Slice needs to be in 0<=r<={} and 0<=c<={}'.format(len(self.series1) + 1, + len(self.series2) + 1)) + if self.compact: + slice = np.empty((re-rb, ce-cb), dtype=np.double) + dtw_cc.wps_expand_slice(self._wp, slice, len(self.series1), len(self.series2), + rb, re, cb, ce, self._c_settings) + else: + slice = self._wp[rb:re, cb:ce] + if positivize: + neg_idx = slice < 0 + slice[neg_idx] = -slice[neg_idx] + return slice + + def best_match(self): + idx = np.unravel_index(np.argmax(self._wp, axis=None), self._wp.shape) + r, c = idx + lcm = LCMatch(self, r, c) + # path = lcm.path + # for (x, y) in path: + # self._wp[x + 1, y + 1] = ma.masked + return lcm + + def kbest_matches_store(self, k=1, minlen=2, buffer=0, restart=True, keep=False, matches=None, tqdm=None): + import time + if matches is None: + matches = LCMatches(self) + it = self.kbest_matches(k=k, minlen=minlen, buffer=buffer, restart=restart) + if tqdm is not None: + it = tqdm(it, total=k) + tp = time.perf_counter() + for ki, match in enumerate(it): + matches.append(match) + tn = time.perf_counter() + #print(f'time: {tn-tp}') + tp = tn + if not keep: + self._reset_wp_mask() + return matches + + def kbest_matches(self, k=1, minlen=2, buffer=0, restart=True): + """Yields the next best LocalConcurrent match. + Stops at k matches (use None for all matches). + + :param k: Number of matches to yield, None is all matches + :param minlen: Consider only matches of length longer than minlen + :param buffer: Matches cannot be closer than buffer to each other + :param restart: Start searching from start, ignore previous calls to kbest_matches + :param keep: Keep mask to search incrementally for multiple calls of kbest_matches + :return: Yield an LCMatch object + """ + if self._wp is None: + self.align() + wp = self._wp + if restart: + self._reset_wp_mask() + l1 = len(self.series1) + l2 = len(self.series2) + lperc = max(100, int(l1/10)) + ki = 0 + while k is None or ki < k: + idx = None + lcm = None + cnt = 0 + while idx is None: + cnt += 1 + if cnt % lperc == 0: + print(f'Searching for matches is taking a long time (k={ki+1}/{k}: {cnt} tries)') + if self.compact: + idx = dtw_cc.wps_max(self._c_parts, wp, l1, l2) + else: + idx = np.unravel_index(np.argmax(wp, axis=None), wp.shape) + if idx[0] == 0 or idx[1] == 0: + # If all are masked, idx=0 is returned + return None + r, c = idx + # print(f'Best value: wp[{r},{c}] = {wp[r,c]}') + lcm = LCMatch(self, r, c) + path = lcm.path + for (x, y) in path: + x += 1 + y += 1 + if not self.compact: + if len(wp.mask.shape) > 0 and wp.mask[x, y] is True: # True means invalid + # print('found path contains masked, restart') + lcm = None + idx = None + break + else: + wp[x, y] = -wp[x, y] # ma.masked + else: + dtw_cc.wps_negativize_value(self._c_parts, wp, l1, l2, x, y) + if len(path) < minlen: + # print('found path too short, restart') + lcm = None + idx = None + if buffer < 0 and lcm is not None: + if self.compact: + dtw_cc.wps_negativize(self._c_parts, wp, + len(self.series1), len(self.series2), + path[0][0]+1, path[-1][0]+2, + path[0][1]+1, path[-1][1]+2) + else: + miny, maxy = 0, wp.shape[1] + minx, maxx = 0, wp.shape[0] + wp[path[0][0]+1:path[-1][0]+2, miny:maxy] = -wp[path[0][0]+1:path[-1][0]+2, miny:maxy] # ma.masked + wp[minx:maxx, path[0][1]+1:path[-1][1]+2] = -wp[minx:maxx, path[0][1]+1:path[-1][1]+2] # ma.masked + elif buffer > 0 and lcm is not None: + miny, maxy = 0, wp.shape[1] - 1 + minx, maxx = 0, wp.shape[0] - 1 + if self.compact: + raise Exception("A positive buffer is not yet supported for compact WP data structure") + else: + for (x, y) in path: + xx = x + 1 + for yy in range(max(miny, y + 1 - buffer), min(maxy, y + 1 + buffer)): + wp[xx, yy] = -wp[xx, yy] # ma.masked + yy = y + 1 + for xx in range(max(minx, x + 1 - buffer), min(maxx, x + 1 + buffer)): + wp[xx, yy] = -wp[xx, yy] # ma.masked + if lcm is not None: + ki += 1 + yield lcm + + def best_path(self, row, col, wp=None): + if self._wp is None: + return None + if wp is None: + wp = self._wp + l1 = len(self.series1) + l2 = len(self.series2) + if self.compact: + p = dtw_cc.best_path_compact_affinity(wp, l1, l2, row, col, window=self.window) + return p + argm = argmax + i = row + j = col + p = [(i - 1, j - 1)] + # prev = self._wp[i, j] + while i > 0 and j > 0: + values = [wp[i - 1, j - 1], wp[i - 1, j], wp[i, j - 1]] + # print(f'{i=}, {j=}, {argm(values)=}, {ma.argmax(values)=}, {values=}') + values = [-1 if v is ma.masked else v for v in values] + c = argmax(values) # triggers "Warning: converting a masked element to nan" + # if values[c] is ma.masked: + # break + if values[c] <= 0: # values[c] > prev: + break + # prev = values[c] + if c == 0: + if wp[i - 1, j - 1] is ma.masked or wp[i - 1, j - 1] < 0: + assert False + break + i, j = i - 1, j - 1 + elif c == 1: + if wp[i - 1, j] is ma.masked or wp[i - 1, j] < 0: + assert False + break + i = i - 1 + elif c == 2: + if wp[i, j - 1] is ma.masked or wp[i, j - 1] < 0: + assert False + break + j = j - 1 + p.append((i - 1, j - 1)) + if p[-1][0] < 0 or p[-1][1] < 0: + p.pop() + p.reverse() + return p + + def settings_from(self, lc): + self.gamma = lc.gamma + self.tau = lc.tau + self.delta = lc.delta + self.delta_factor = lc.delta_factor + self.penalty = lc.penalty + self.window = lc.window + + def settings(self, kind=None): + d = { + "gamma": self.gamma, + "tau": self.tau, + "delta": self.delta, + "delta_factor": self.delta_factor, + "penalty": self.penalty, + "window": self.window, + } + if kind == "str": + return "\n".join(f"{k:<13}: {v}" for k, v in d.items()) + return d + + def wp_c_print(self): + dtw_cc.wps_print(self._wp, len(self.series1), len(self.series2), window=self.window) + + def wp_c_print_compact(self): + dtw_cc.wps_print_compact(self._wp, len(self.series1), len(self.series2), window=self.window) diff --git a/dtaidistance/subsequence/subsequencealignment.py b/dtaidistance/subsequence/subsequencealignment.py new file mode 100644 index 00000000..41b5c01c --- /dev/null +++ b/dtaidistance/subsequence/subsequencealignment.py @@ -0,0 +1,269 @@ +# -*- coding: UTF-8 -*- +""" +dtaidistance.subsequence.subsequencealignment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +(requires version 2.3.0 or higher) + +DTW-based subsequence matching. + +:author: Wannes Meert +:copyright: Copyright 2021-2023 KU Leuven, DTAI Research Group. +:license: Apache License, Version 2.0, see LICENSE for details. + +""" +import logging + +from .. import dtw # import warping_paths, warping_paths_fast, best_path, warping_paths_affinity, distance +from .. import dtw_ndim +from .. import util_numpy +from .. import util + + +try: + if util_numpy.test_without_numpy(): + raise ImportError() + import numpy as np + import numpy.ma as ma + argmin = np.argmin + argmax = np.argmax + array_min = np.min + array_max = np.max +except ImportError: + np = None + ma = None + argmin = util.argmin + argmax = util.argmax + array_min = min + array_max = max + + +logger = logging.getLogger("be.kuleuven.dtai.distance") + + +dtw_cc = None +try: + from . import dtw_cc +except ImportError: + dtw_cc = None + + +def subsequence_alignment(query, series, use_c=False): + """See SubsequenceAligment. + + :param query: + :param series: + :return: + """ + sa = SubsequenceAlignment(query, series, use_c=use_c) + sa.align() + return sa + + +class SAMatch: + def __init__(self, idx, alignment): + """SubsequenceAlignment match""" + self.idx = idx + self.alignment = alignment + + @property + def value(self): + """Normalized DTW distance of match. + + Normalization is the DTW distance divided by the query length. + """ + return self.alignment.matching[self.idx] + + @property + def distance(self): + """DTW distance of match. + + This value is dependent on the length of the query. Use the value + property when comparing queries of different lengths. + """ + return self.value * len(self.alignment.query) + + @property + def segment(self): + """Matched segment in series.""" + start = self.alignment.matching_function_startpoint(self.idx) + end = self.alignment.matching_function_endpoint(self.idx) + return [start, end] + + @property + def path(self): + """Matched path in series""" + return self.alignment.matching_function_bestpath(self.idx) + + def __str__(self): + return f'SAMatch({self.idx})' + + def __repr__(self): + return self.__str__() + + +class SubsequenceAlignment: + def __init__(self, query, series, penalty=0.1, use_c=False): + """Subsequence alignment using DTW. + Find where the query occurs in the series. + + Based on Fundamentals of Music Processing, Meinard Müller, Springer, 2015. + + Example:: + + query = np.array([1., 2, 0]) + series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0]) + sa = subsequence_search(query, series) + mf = sa.matching_function() + sa.kbest_matches(k=2) + + + :param query: Subsequence to search for + :param series: Long sequence in which to search + :param penalty: Penalty for non-diagonal matching + :param use_c: Use the C-based DTW function if available + """ + self.query = query + self.series = series + self.penalty = penalty + self.paths = None + self.matching = None + self.use_c = use_c + + def reset(self): + self.matching = None + + def align(self): + if self.matching is not None: + return + psi = [0, 0, len(self.series), len(self.series)] + if np is not None and isinstance(self.series, np.ndarray) and len(self.series.shape) > 1: + if not self.use_c: + _, self.paths = dtw_ndim.warping_paths(self.query, self.series, penalty=self.penalty, psi=psi, + psi_neg=False) + else: + _, self.paths = dtw_ndim.warping_paths_fast(self.query, self.series, penalty=self.penalty, psi=psi, + compact=False, psi_neg=False) + else: + if not self.use_c: + _, self.paths = dtw.warping_paths(self.query, self.series, penalty=self.penalty, psi=psi, + psi_neg=False) + else: + _, self.paths = dtw.warping_paths_fast(self.query, self.series, penalty=self.penalty, psi=psi, + compact=False, psi_neg=False) + self._compute_matching() + + def align_fast(self): + use_c = self.use_c + self.use_c = True + result = self.align() + self.use_c = use_c + return result + + def _compute_matching(self): + matching = self.paths[-1, :] + if len(matching) > len(self.series): + matching = matching[-len(self.series):] + self.matching = np.array(matching) / len(self.query) + + def warping_paths(self): + """Get matrix with all warping paths. + + If the aligmnent was computed using a compact, the paths are first copied into a full + warping paths matrix. + + :return: Numpy matrix of size (len(query)+1) * (len(series)+1) + """ + return self.paths + + def matching_function(self): + """The matching score for each end-point of a possible match.""" + return self.matching + + def get_match(self, idx): + return SAMatch(idx, self) + + def best_match_fast(self): + use_c = self.use_c + self.use_c = True + result = self.best_match() + self.use_c = use_c + return result + + def best_match(self): + best_idx = np.argmin(self.matching) + return self.get_match(best_idx) + + def kbest_matches_fast(self, k=1, overlap=0): + use_c = self.use_c + self.use_c = True + result = self.kbest_matches(k=k, overlap=overlap) + self.use_c = use_c + return result + + def kbest_matches(self, k=1, overlap=0): + """Yields the next best match. Stops at k matches (use None for all matches). + + :param k: Number of matches to yield. None is all matches. + :param overlap: Matches cannot overlap unless overlap > 0. + :return: Yield an SAMatch object + """ + self.align() + matching = np.array(self.matching) + maxv = np.ceil(np.max(matching) + 1) + matching[:min(len(self.query) - 1, overlap)] = maxv + ki = 0 + while k is None or ki < k: + best_idx = np.argmin(matching) + if best_idx == 0 or np.isinf(matching[best_idx]) or matching[best_idx] == maxv: + # No more matches found + break + match = self.get_match(best_idx) + b, e = match.segment + cur_overlap = min(overlap, e - b - 1) + mb, me = best_idx + 1 - (e - b) + cur_overlap, best_idx + 1 + if np.isinf(np.max(matching[mb:me])): + # No overlapping matches + matching[best_idx] = maxv + continue + matching[mb:me] = np.inf + ki += 1 + yield match + + def matching_function_segment(self, idx): + """Matched segment in series.""" + start = self.matching_function_startpoint(idx) + end = self.matching_function_endpoint(idx) + return [start, end] + + def matching_function_endpoint(self, idx): + """Index in series for end of match in matching function at idx. + + :param idx: Index in matching function + :return: Index in series + """ + if len(self.matching) == len(self.series): + return idx + diff = len(self.series) - len(self.matching) + return idx + diff + + def matching_function_startpoint(self, idx): + """Index in series for start of match in matching function at idx. + + :param idx: Index in matching function + :return: Index in series + """ + real_idx = idx + 1 + path = dtw.best_path(self.paths, col=real_idx) + start_idx = path[0][1] + return start_idx + + def matching_function_bestpath(self, idx): + """Indices in series for best path for match in matching function at idx. + + :param idx: Index in matching function + :return: List of (row, col) + """ + real_idx = idx + 1 + path = dtw.best_path(self.paths, col=real_idx) + return path diff --git a/dtaidistance/subsequence/subsequencesearch.py b/dtaidistance/subsequence/subsequencesearch.py new file mode 100644 index 00000000..9195b713 --- /dev/null +++ b/dtaidistance/subsequence/subsequencesearch.py @@ -0,0 +1,296 @@ +# -*- coding: UTF-8 -*- +""" +dtaidistance.subsequence.subsequencesearch +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +(requires version 2.3.0 or higher) + +DTW-based subsequence matching. + +:author: Wannes Meert +:copyright: Copyright 2021-2023 KU Leuven, DTAI Research Group. +:license: Apache License, Version 2.0, see LICENSE for details. + +""" +import logging + +from .. import dtw # import warping_paths, warping_paths_fast, best_path, warping_paths_affinity, distance +from .. import dtw_ndim +from .. import util_numpy +from .. import util + + +try: + if util_numpy.test_without_numpy(): + raise ImportError() + import numpy as np + import numpy.ma as ma + argmin = np.argmin + argmax = np.argmax + array_min = np.min + array_max = np.max +except ImportError: + np = None + ma = None + argmin = util.argmin + argmax = util.argmax + array_min = min + array_max = max + + +logger = logging.getLogger("be.kuleuven.dtai.distance") + + +dtw_cc = None +try: + from . import dtw_cc +except ImportError: + dtw_cc = None + + +def subsequence_search(query, series, dists_options=None, use_lb=True, + max_dist=None, max_value=None, use_c=None): + """See SubsequenceSearch. + + :param query: Time series to search for + :param series: Iterator over time series to perform search on. + This can be for example windows over a long time series. + :param dists_options: Options passed on to `dtw.distance` + :param use_lb: Use lowerbounds to early abandon options + :param max_dist: Ignore DTW distances larger than this value + :param max_value: Ignore normalized DTW distances larger than this value + :param use_c: Use fast C implementation if available + :return: SubsequenceSearch object + """ + ss = SubsequenceSearch(query, series, dists_options=dists_options, use_lb=use_lb, + max_dist=max_dist, max_value=max_value, use_c=use_c) + return ss + + +class SSMatch: + """Found match by SubsequenceSearch. + + The match is identified by the idx property, which is the index of the matched + series in the original list of series. The distance property returns the DTW + distance between the query and the series at index idx. + """ + def __init__(self, kidx, ss): + self.kidx = kidx + self.ss = ss + + @property + def distance(self): + """DTW distance.""" + return self.ss.kbest_distances[self.kidx][0] + + @property + def value(self): + """Normalized DTW distance.""" + return self.distance / len(self.ss.query) + + @property + def idx(self): + return self.ss.kbest_distances[self.kidx][1] + + def __str__(self): + return f'SSMatch({self.idx})' + + def __repr__(self): + return self.__str__() + + +class SSMatches: + def __init__(self, ss, k=None): + """Iterator over matches. + + :param ss: The SubsequenceSearch object + :param k: Optional a k. This overrules the ss.k value. + Useful if a smaller k is asked to iterate over than has been stored. + """ + self.ss = ss + self.k = k + if self.ss.kbest_distances is None: + self.k = 0 + elif self.k is None or self.k > self.ss.k: + self.k = self.ss.k + if self.k is None: + self.k = len(self.ss.kbest_distances) + + def __getitem__(self, key): + if isinstance(key, slice): + start = 0 if key.start is None else key.start + return [SSMatch(kip+start, self.ss) for kip, (v, i) in + enumerate(self.ss.kbest_distances[key])] + return SSMatch(key, self.ss) + + def __iter__(self): + for ki, (v, i) in enumerate(self.ss.kbest_distances[:self.k]): + yield SSMatch(ki, self.ss) + + def __len__(self): + return self.k + + def __str__(self): + if self.k > 10: + return '[' + ', '.join(str(m) for m in self[:5]) + ' ... ' +\ + ', '.join(str(m) for m in self[-5:]) + ']' + return '[' + ', '.join(str(m) for m in self) + ']' + + +class SubsequenceSearch: + def __init__(self, query, s, dists_options=None, use_lb=True, keep_all_distances=False, + max_dist=None, max_value=None, use_c=None, use_ndim=None): + """Search the best matching (subsequence) time series compared to a given time series. + + :param query: Time series to search for + :param s: Iterator over time series to perform search on. + This can be for example windows over a long time series. + :param dists_options: Options passed on to `dtw.distance` + :param use_lb: Use lowerbounds to early abandon options + :param max_dist: Ignore DTW distances larger than this value + if max_dist is also given in dists_options, then the one in dists_options is ignored + if both max_dist and max_value are given, the smallest is used + :param max_value: Ignore normalized DTW distances larger than this value + """ + self.query = query + if use_ndim is None: + self.use_ndim = (util.detect_ndim(query) > 1) + else: + self.use_ndim = use_ndim + self.s = s + # If keep_all_distances is true, store all. Can take up quite some memory. + self.distances = None + # Keep track of the k-best distances + self.kbest_distances = None + self.lbs = None + self.k = None + self.dists_options = {} if dists_options is None else dists_options + if max_dist is None: + self.max_dist = self.dists_options.get('max_dist', np.inf) + else: + self.max_dist = max_dist + if max_value is not None: + self.max_dist = min(self.max_dist, max_value * len(self.query)) + self.dists_options['max_dist'] = self.max_dist + if use_c is not None: + self.dists_options['use_c'] = use_c + self.use_lb = use_lb + + self.keep_all_distances = keep_all_distances + # if self.use_lb and not self.keep_all_distances: + # raise ValueError("If use_lb is true, then keep_all_distances should also be true.") + + def reset(self): + self.distances = None + self.kbest_distances = None + self.lbs = None + + # def compute_lbs(self): + # self.lbs = np.zeros((len(self.s),)) + # for idx, series in enumerate(self.s): + # self.lbs[idx] = dtw.lb_keogh(self.query, series, **self.dists_options) + + def align_fast(self, k=None): + use_c = self.dists_options['use_c'] + self.dists_options['use_c'] = True + result = self.align(k=k) + self.dists_options['use_c'] = use_c + return result + + def align(self, k=None): + if k is not None and self.k is not None and k <= self.k and self.kbest_distances is not None: + return self.kbest_distances[:k] + if self.use_ndim: + distance = dtw_ndim.distance + lb_keogh = None + if self.use_lb: + self.use_lb = False + logger.warning('The setting use_lb is ignored for multivariate series.') + else: + distance = dtw.distance + lb_keogh = dtw.lb_keogh + if k is None or self.keep_all_distances: + self.distances = np.zeros((len(self.s),)) + # if self.use_lb: + # self.compute_lbs() + import heapq + h = [(-np.inf, -1)] + max_dist = self.max_dist + for idx, series in enumerate(self.s): + if self.use_lb: + lb = lb_keogh(self.query, series, **self.dists_options) + if lb > max_dist: + continue + dist = distance(self.query, series, **self.dists_options) + if k is not None: + if len(h) < k: + if not np.isinf(dist) and dist <= max_dist: + heapq.heappush(h, (-dist, idx)) + max_dist = min(max_dist, -h[0][0]) + else: + if not np.isinf(dist) and dist <= max_dist: + heapq.heappushpop(h, (-dist, idx)) + max_dist = min(max_dist, -h[0][0]) + self.dists_options['max_dist'] = max_dist + if self.keep_all_distances or k is None: + self.distances[idx] = dist + if k is not None: + # hh = np.array([-v for v, _ in h]) + # self.kbest_distances = [(-h[i][0], h[i][1]) for i in np.argsort(hh)] + self.kbest_distances = sorted((-v, i) for v, i in h if i != -1) + else: + self.kbest_distances = [(self.distances[i], i) for i in np.argsort(self.distances)] + + self.k = k + return self.kbest_distances + + def get_ith_value(self, i): + """Return the i-th value from the k-best values. + + :param i: Return i-th best value (i < k) + :return: (distance, index) + """ + if self.kbest_distances is None or self.k is None: + raise ValueError('Align should be called before asking for the i-th value.') + if i > self.k: + raise ValueError('The i-th value is not available, i={}>k={}'.format(i, self.k)) + return self.kbest_distances[i] + + def best_match_fast(self): + self.dists_options['use_c'] = True + return self.best_match() + + def best_match(self): + self.align(k=1) + # _value, best_idx = self.kbest_distances[0] + return SSMatch(0, self) + + def kbest_matches_fast(self, k=1): + use_c = self.dists_options.get('use_c', None) + self.dists_options['use_c'] = True + result = self.kbest_matches(k=k) + if use_c is None: + del self.dists_options['use_c'] + else: + self.dists_options['use_c'] = use_c + return result + + def kbest_matches(self, k=1): + """Return the k best matches. + + It is recommended to set k to a value, and not None. + If k is set to None, all comparisons are kept and returned. Also no early + stopping is applied in case k is None. + + :param k: Number of best matches to return (default is 1) + :return: List of SSMatch objects + """ + self.align(k=k) + # if k is None: + # return [SSMatch(best_idx, self) for best_idx in range(len(self.distances))] + # if self.keep_all_distances: + # best_idxs = np.argpartition(self.distances, k) + # return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] + # distances = reversed(sorted(self.h)) + # return [SSMatch(best_idx, self) for dist, best_idx in distances] + return SSMatches(self) diff --git a/tests/rsrc/pat1.txt b/tests/rsrc/pat1.txt new file mode 100644 index 00000000..04dec140 --- /dev/null +++ b/tests/rsrc/pat1.txt @@ -0,0 +1,2 @@ +112.69 115.13 119.03 129.86 144.22 145.72 143.08 138.21 137.28 140.87 126.06 127.53 135.01 121.22 104.71 108.07 117.99 122.30 126.25 131.54 125.56 127.25 135.65 135.01 131.06 126.09 124.89 124.85 139.22 140.09 137.69 135.28 129.36 125.69 126.98 126.06 120.94 125.64 132.07 123.93 125.93 141.25 130.65 124.50 131.30 123.37 113.61 111.12 111.56 123.70 130.22 129.09 132.46 127.28 120.49 118.02 113.39 108.17 108.35 111.65 112.56 115.84 117.86 117.51 119.33 117.09 130.04 147.63 125.28 119.26 136.44 131.43 137.64 141.74 130.15 127.59 121.87 124.08 126.03 127.83 136.25 130.02 139.15 145.56 144.48 145.98 147.49 150.55 148.13 128.97 100.19 91.52 90.39 89.68 92.05 87.28 75.45 70.21 69.79 72.25 80.10 89.10 85.68 79.96 82.01 80.30 75.77 68.22 67.15 69.04 64.95 67.79 68.62 65.55 69.46 70.06 71.18 76.05 81.61 86.02 83.57 83.43 85.10 80.64 78.57 84.23 90.91 96.12 93.40 95.29 100.56 95.82 90.02 93.19 93.84 87.32 89.50 92.15 88.23 84.25 81.36 83.29 81.56 73.01 70.44 74.11 75.71 74.68 77.06 74.17 66.93 66.93 68.58 67.68 70.79 75.28 71.28 70.67 75.40 76.15 76.50 71.88 63.19 65.45 66.88 66.08 65.34 61.77 65.06 64.60 63.70 61.11 68.49 108.82 143.06 160.51 165.37 152.39 152.67 149.55 157.69 203.46 232.09 228.53 227.02 218.89 202.73 186.95 156.11 130.27 125.58 114.34 103.19 102.91 102.99 102.32 99.32 91.40 84.09 79.93 79.07 89.37 102.08 107.00 105.97 109.38 112.28 111.10 111.03 113.82 109.19 103.12 102.78 95.07 96.18 99.63 92.05 86.51 86.83 88.42 95.78 98.90 93.02 89.16 86.07 90.49 92.07 91.03 91.01 87.19 87.76 88.86 100.27 114.86 116.79 119.00 130.19 132.30 137.17 143.55 143.85 141.12 144.06 151.32 155.10 147.74 144.63 147.63 144.59 136.65 132.02 140.01 129.86 124.88 135.76 125.67 121.27 137.86 132.51 129.24 132.04 127.75 134.25 141.49 137.84 141.27 145.56 146.34 141.86 149.98 151.17 148.23 160.04 146.48 131.71 136.73 133.19 128.39 133.67 129.44 139.41 146.20 143.82 143.43 148.76 152.71 151.57 148.98 137.27 135.03 127.12 128.67 142.12 137.30 138.15 136.94 122.82 113.35 110.16 108.58 +88.03 81.35 78.31 90.47 104.21 104.33 104.21 107.54 108.82 113.98 112.97 105.07 104.64 96.82 86.01 80.67 84.78 91.67 95.50 102.01 105.73 107.03 119.91 121.04 109.87 114.94 125.55 135.09 128.09 122.19 127.64 125.68 121.82 110.46 104.53 111.64 115.70 112.53 109.09 103.55 106.49 109.98 102.25 103.14 105.13 100.69 102.32 100.98 97.77 105.00 110.29 119.35 121.21 115.88 110.73 109.70 102.56 91.36 100.64 106.76 102.58 106.37 111.47 117.34 124.25 125.59 135.27 144.44 135.15 133.62 143.98 145.37 139.00 128.83 130.15 144.34 146.25 147.91 140.03 131.55 137.48 126.08 122.11 120.32 115.26 122.01 119.07 111.03 108.94 110.82 110.57 108.38 104.67 92.56 88.19 88.61 87.23 91.00 87.21 79.45 82.14 79.69 73.55 76.22 74.27 76.18 81.75 85.77 81.74 69.08 68.16 72.57 73.99 79.17 86.85 87.55 73.81 61.07 62.53 59.79 64.38 71.24 69.03 71.56 69.02 65.14 63.33 60.86 67.78 68.87 66.43 69.21 59.59 53.40 57.13 61.56 58.89 52.05 50.23 54.73 58.05 68.79 80.82 72.78 67.82 73.92 71.04 63.65 64.29 65.16 65.35 61.61 61.58 66.68 64.68 65.45 62.32 56.60 58.32 62.43 57.72 48.44 44.35 48.19 54.87 55.88 55.34 53.92 49.41 43.06 41.77 38.42 52.75 106.28 167.34 201.30 195.15 201.70 228.94 234.21 224.09 223.49 247.11 238.51 216.25 201.58 184.26 152.58 122.19 109.74 92.24 86.51 78.44 72.36 75.82 80.56 78.69 70.85 69.72 64.73 60.12 62.57 63.19 67.70 66.46 66.39 74.16 74.16 69.79 80.29 79.10 63.91 64.71 57.17 52.39 55.05 55.27 54.69 56.24 59.31 60.27 59.88 59.34 57.62 55.39 58.76 56.46 52.31 52.47 50.14 51.89 51.92 58.29 65.87 72.42 89.00 103.68 109.33 100.86 93.52 94.92 106.48 127.83 133.22 135.17 140.30 129.14 121.58 125.50 128.09 128.44 130.44 131.14 115.89 107.10 107.61 105.31 108.86 103.66 93.60 82.97 90.51 106.73 109.77 119.67 123.20 123.73 133.74 132.01 123.72 120.24 125.93 132.56 120.50 108.01 114.75 120.82 122.01 125.89 117.34 108.05 125.03 129.27 118.64 117.91 108.67 104.80 108.74 103.26 101.98 105.89 101.07 93.19 90.40 92.45 93.71 94.65 93.80 89.96 89.33 diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index 6a1aef96..01a4dc74 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -6,7 +6,7 @@ from dtaidistance import util_numpy, util from dtaidistance.subsequence.dtw import subsequence_alignment, local_concurrences,\ - subsequence_search + subsequence_search, LocalConcurrences from dtaidistance import dtw_visualisation as dtwvis from dtaidistance.exceptions import MatplotlibException from dtaidistance.dtw import lb_keogh @@ -159,15 +159,20 @@ def test_dtw_localconcurrences_eeg(): tau = np.exp(-gamma * diffp**2) # threshold # print(f'{tau=}, {delta=}') # tau=0.8532234738897421, delta=-1.7064469477794841 - buffer = 10 + buffer = -10 minlen = 20 - lc = local_concurrences(series, gamma=gamma, tau=tau, delta=delta, delta_factor=delta_factor) + tp = time.perf_counter() + lc = local_concurrences(series, gamma=gamma, tau=tau, delta=delta, delta_factor=delta_factor, + use_c=True) + tn = time.perf_counter() + print(f'Align took: {tn-tp} seconds') + print('{}, {}'.format(lc.tau, lc.delta)) # print(f'{lc.tau=}, {lc.delta=}') matches = [] - for match in lc.kbest_matches(k=100, minlen=minlen, buffer=buffer): - if match is None: - break - matches.append(match) + tp = time.perf_counter() + matches = lc.kbest_matches_store(k=100, minlen=minlen, buffer=buffer) + tn = time.perf_counter() + print(f'KBest matches took: {tn - tp} seconds') print( [(m.row, m.col) for m in matches]) # assert [(m.row, m.col) for m in matches] == [(84, 95), (65, 93), (50, 117), (117, 200), (32, 180), # (160, 178), (96, 139), (138, 181), (71, 200), (71, 117), @@ -184,7 +189,7 @@ def test_dtw_localconcurrences_eeg(): raise MatplotlibException("No matplotlib available") fn = directory / "test_dtw_localconcurrences.png" fig = plt.figure() - fig, ax = dtwvis.plot_warpingpaths(series, series, lc.wp, path=-1, figure=fig) + fig, ax = dtwvis.plot_warpingpaths(series, series, lc.wp_slice(), path=-1, figure=fig) for match in matches: dtwvis.plot_warpingpaths_addpath(ax, match.path) plt.savefig(fn) @@ -195,37 +200,73 @@ def test_dtw_localconcurrences_eeg(): @numpyonly def test_dtw_localconcurrences_short(): with util_numpy.test_uses_numpy() as np: - series = np.array([0, -1, -1, 0, 1, 2, 1, 0, 0, 0, 1, 3, 2, 1, 0, 0, 0, -1, 0]) + series1 = np.array([0., -1, -1, 0, 1, 2, 1, 0, 0, 0, 1, 3, 2, 1, 0, 0, 0, -1, 0]) + # series1 = np.array([0., -1, -1, 0, 1, 2, 1]) + # series2 = series1 + series2 = np.array([0.4, -0.9, -1.3, 1, 0.1, 2, 1, 0, 0, 10, 8, 3, 2, 1, 0, 0, 0, -1, 0]) gamma = 1 threshold_tau = 70 - delta = -2 * np.exp(-gamma * np.percentile(series, threshold_tau)) # -len(series)/2 # penalty - delta_factor = 0.5 - tau = np.exp(-gamma * np.percentile(series, threshold_tau)) # threshold - # print(f'{tau=}, {delta=}') - buffer = 10 + delta = -2 * np.exp(-gamma * np.percentile(series1, threshold_tau)) # -len(series)/2 # penalty + delta_factor = 0.1 #0.5 + tau = np.exp(-gamma * np.percentile(series1, threshold_tau)) # threshold + # print(f'{tau=}, {delta=}, {delta_factor=}, {gamma=}') + buffer = -10 minlen = 3 - lc = local_concurrences(series, gamma=gamma, tau=tau, delta=delta, delta_factor=delta_factor, penalty=1) - matches = [] - for match in lc.kbest_matches(k=100, minlen=minlen, buffer=buffer): - if match is None: - break - matches.append(match) - - assert [(m.row, m.col) for m in matches] == [(10, 17), (4, 19)] + window = None + lc = local_concurrences(series1, series2, gamma=gamma, tau=tau, delta=delta, delta_factor=delta_factor, penalty=1, + window=window, use_c=False) + # print(lc.settings(kind="str")) + # with np.printoptions(precision=2, linewidth=400): + # print(lc.wp) + lc2 = local_concurrences(series1, series2, gamma=gamma, tau=tau, delta=delta, delta_factor=delta_factor, penalty=1, + window=window, use_c=True, compact=True) + # with np.printoptions(precision=2, linewidth=400): + # # print(lc2.wp) + # lc2.wp_c_print() + np.testing.assert_allclose(lc.wp, lc2.wp_slice()) + p = lc.best_path(len(series1) - 1, len(series2)) + p2 = lc2.best_path(len(series1) - 1, len(series2)) + # print(p) + # print(p2) + assert str(p) == str(p2) + # assert str(p) == "[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), "\ + # "(11, 12), (12, 13), (13, 14), (14, 15), (15, 16), (16, 17), (17, 18)]", \ + # str(p) + matches = lc.kbest_matches_store(k=100, minlen=minlen, buffer=buffer) + matches2 = lc2.kbest_matches_store(k=100, minlen=minlen, buffer=buffer) + # print(matches) + # print(matches2) + # assert str(matches) == str(matches2) + # print(lc2._wp) + + # assert [(m.row, m.col) for m in matches] == [(18, 19), (4, 19)] + + # print(lc2.wp_slice(2, 6, 3, 5)) if directory and not dtwvis.test_without_visualization(): try: import matplotlib.pyplot as plt + import matplotlib.backends.backend_pdf except ImportError: raise MatplotlibException("No matplotlib available") - fn = directory / "test_dtw_localconcurrences_short.png" + fn = directory / "test_dtw_localconcurrences_short.pdf" + pdf = matplotlib.backends.backend_pdf.PdfPages(fn) fig = plt.figure() - fig, ax = dtwvis.plot_warpingpaths(series, series, lc.wp, path=-1, figure=fig) + fig, ax = dtwvis.plot_warpingpaths(series1, series2, lc.wp, path=-1, figure=fig) for match in matches: dtwvis.plot_warpingpaths_addpath(ax, match.path) - plt.savefig(fn) + pdf.savefig(fig) + plt.close(fig) + + fig, ax = matches.plot(begin=None, end=None, showlegend=False) + pdf.savefig(fig) + plt.close(fig) + + fig, ax = matches2.plot(begin=None, end=None, showlegend=False) + pdf.savefig(fig) plt.close(fig) + pdf.close() def create_data_subseqsearch_eeg(np, dtype=None, longer=False): @@ -286,7 +327,6 @@ def test_dtw_subseqsearch_eeg2(): assert str(best) == "[SSMatch(15)]", str(best) - @numpyonly @pytest.mark.benchmark(group="subseqsearch_eeg") def test_dtw_subseqsearch_eeg(benchmark): @@ -294,22 +334,21 @@ def test_dtw_subseqsearch_eeg(benchmark): query, s, k, series, s_idx = create_data_subseqsearch_eeg(np) def run(): - sa = subsequence_search(query, s, dists_options={'use_c': True}, - keep_all_distances=False) - best = sa.kbest_matches_fast(k=k) - return best, sa + ss = subsequence_search(query, s, dists_options={'use_c': True}) + best = ss.kbest_matches_fast(k=k) + return best, ss if benchmark is None: tic = time.perf_counter() best, sa = run() toc = time.perf_counter() print("Searching performed in {:0.4f} seconds".format(toc - tic)) else: - best = benchmark(run) + best, ss = benchmark(run) # print(sa.distances) # print(best) assert str(best) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best) - assert str(best[0]) == str(sa.best_match()), '{} != {}'.format(best[0], sa.best_match()) + assert str(best[0]) == str(ss.best_match()), '{} != {}'.format(best[0], ss.best_match()) assert str(best[:]) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best[:]) assert str(best[0:3]) == "[SSMatch(15), SSMatch(7), SSMatch(4)]", str(best[0:3]) @@ -347,6 +386,192 @@ def run(): plt.close(fig) +@numpyonly +def test_lc_pat1(): + with util_numpy.test_uses_numpy() as np: + data_fn = Path(__file__).parent / 'rsrc' / 'pat1.txt' + data = np.loadtxt(data_fn) + series1 = data[0, :] + series2 = data[1, :] + + # gamma = 1 + # threshold_tau = 70 + # delta = -2 * np.exp(-gamma * np.percentile(series, threshold_tau)) # -len(series)/2 # penalty + # delta_factor = 0.5 + # tau = np.exp(-gamma * np.percentile(series, threshold_tau)) # threshold + # # print(f'{tau=}, {delta=}') + buffer = 50 + minlen = 20 + t1 = time.time() + lc = LocalConcurrences(series1, series2, window=int(buffer / 2), use_c=False) + lc.estimate_settings(series1, series2, tau_type='mean', tau_factor=2) + # lc.delta_factor = 1 + lc.exp_avg = None # 0.1 + lc.align() + t2 = time.time() + print(f'Time to align: {t2-t1}') + + print(lc.settings(kind="str")) + t1 = time.time() + matches = [] + for match in lc.kbest_matches(k=None, minlen=minlen, buffer=-buffer): + if match is None: + break + matches.append(match) + t2 = time.time() + print(f'Time to find matches: {t2 - t1}') + + sm = lc.similarity_matrix() + + if directory and not dtwvis.test_without_visualization(): + try: + import matplotlib.pyplot as plt + import matplotlib.backends.backend_pdf + except ImportError: + raise MatplotlibException("No matplotlib available") + fn = directory / "test_lc_pat1.pdf" + pdf = matplotlib.backends.backend_pdf.PdfPages(fn) + fig = plt.figure(figsize=(10, 10)) + fig, ax = dtwvis.plot_warpingpaths(series1, series2, sm, + path=-1, figure=fig, showlegend=True, + matshow_kwargs=lc.similarity_matrix_matshow_kwargs(sm)) + pdf.savefig(fig) + + fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10)) + values = sm # np.array(sm.reshape((1, -1))) + # print(np.histogram(values)) + ax.hist(values, bins=100, histtype='step', range=(-2, 1)) + ax.vlines([lc.tau, lc.delta], 0, 100, color='red') + pdf.savefig(fig) + + fig = plt.figure(figsize=(10,10)) + fig, ax = dtwvis.plot_warpingpaths(series1, series2, + lc.wp_slice(), + path=-1, figure=fig, showlegend=True) + for match in matches: + dtwvis.plot_warpingpaths_addpath(ax, match.path) + pdf.savefig(fig) + + fig = plt.figure(figsize=(10, 10)) + plt_slice = [180, 260] + wp = lc.wp_slice(*plt_slice, *plt_slice) + idx = wp < 0 + wp[idx] = -wp[idx] + fig, ax = dtwvis.plot_warpingpaths(series1[slice(*plt_slice)], series2[slice(*plt_slice)], + lc.wp_slice(*plt_slice, *plt_slice), + path=-1, figure=fig, showlegend=True) + for match in matches: + dtwvis.plot_warpingpaths_addpath(ax, dtwvis.path_slice(match.path, *plt_slice, *plt_slice)) + pdf.savefig(fig) + + pdf.close() + + +@numpyonly +def test_lc_pat2(): + with util_numpy.test_uses_numpy() as np: + series1 = np.array([75.01651784, 74.44948843, 73.38243493, 77.35680206, + 69.52647053, 63.21939026, 56.95414623, 49.94568125, + 45.33157425, 40.95059044, 41.90307336, 47.06055967, + 51.81505341, 55.43155354, 51.96320753, 48.20277142, + 45.00462966, 35.55600635, 32.1914493, 33.54732185, + 34.04776747, 35.40227115, 37.24460185, 32.33497074, + 31.56839457, 38.15810559, 39.529532, 29.82815339, + 19.36802916, 14.66462762, 13.16508061, 10.69378841, + 7.1061679, 6.12218125, 8.46480483, 11.15576087, + 11.91750573, 53.74608065, 135.66392315, 217.12666884, + 239.27151034, 210.42673095, 230.90243747, 256.56928804, + 243.7451023, 228.93578139, 224.0126408, 203.47771786, + 188.99680514, 194.56795499, 170.74918008, 138.98955878, + 124.02149478, 117.15055405, 86.49275773, 54.06789222, + 50.06429329, 47.90493883, 48.49793216, 45.46054247, + 36.37510044, 34.08652235, 27.73445741, 20.35611805, + 20.06074597, 20.66984978, 24.55085633, 29.0051147, + 30.67045557, 30.4834716]) + series2 = np.array([73.36588064, 69.89075092, 70.72715301, 71.09805833, + 64.71067346, 62.81497759, 62.0549533 , 56.25529614, + 46.65040363, 41.25971095, 43.61368296, 45.47837141, + 47.76064341, 50.8908327 , 53.30984479, 52.99279867, + 46.0746253 , 38.24987477, 33.40804685, 33.12954541, + 34.22982437, 33.82184342, 35.43014094, 32.7833301 , + 30.38732958, 34.17202522, 35.76355013, 29.71998153, + 19.8391811 , 14.60766209, 13.29786615, 10.73641285, + 7.55388596, 6.67208187, 8.39639018, 9.95173822, + 12.63007853, 28.26870444, 79.11514597, 141.86765425, + 152.2452013 , 141.92643704, 161.33045751, 176.69692497, + 164.85278629, 153.73041258, 154.77219994, 140.065507 , + 121.55208147, 111.32788681, 96.9295085 , 73.39609933, + 62.69909111, 59.72158109, 42.21259683, 30.58299875, + 27.85303729, 27.50338718, 27.51806518, 24.46138111, + 18.28259934, 19.72762923, 21.058755 , 19.06932742, + 21.35947407, 21.32860493, 23.90240201, 29.79843967, + 33.49106013, 31.54302726]) + buffer = 50 + minlen = 20 + t1 = time.time() + lc = LocalConcurrences(series1, series2, window=int(buffer / 2), use_c=False) + lc.estimate_settings(series1, series2, tau_type='mean', tau_factor=2) + # lc.delta_factor = 1 + lc.exp_avg = None # 0.1 + lc.squash = 10 + lc.align() + t2 = time.time() + print(f'Time to align: {t2 - t1}') + + print(lc.settings(kind="str")) + t1 = time.time() + matches = [] + for match in lc.kbest_matches(k=None, minlen=minlen, buffer=-buffer): + if match is None: + break + matches.append(match) + t2 = time.time() + print(f'Time to find matches: {t2 - t1}') + + sm = lc.similarity_matrix() + + if directory and not dtwvis.test_without_visualization(): + try: + import matplotlib.pyplot as plt + import matplotlib.backends.backend_pdf + except ImportError: + raise MatplotlibException("No matplotlib available") + fn = directory / "test_lc_pat2.pdf" + pdf = matplotlib.backends.backend_pdf.PdfPages(fn) + fig = plt.figure(figsize=(10, 10)) + fig, ax = dtwvis.plot_warpingpaths(series1, series2, sm, + path=-1, figure=fig, showlegend=True, + matshow_kwargs=lc.similarity_matrix_matshow_kwargs(sm)) + pdf.savefig(fig) + + fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10)) + values = sm # np.array(sm.reshape((1, -1))) + # print(np.histogram(values)) + ax.hist(values, bins=100, histtype='step', range=(-2, 1)) + ax.vlines([lc.tau, lc.delta], 0, 100, color='red') + pdf.savefig(fig) + + fig = plt.figure(figsize=(10, 10)) + print(lc.wp_slice(positivize=True)) + fig, ax = dtwvis.plot_warpingpaths(series1, series2, + lc.wp_slice(positivize=True), + path=-1, figure=fig, showlegend=True) + for match in matches: + dtwvis.plot_warpingpaths_addpath(ax, match.path) + pdf.savefig(fig) + + # fig = plt.figure(figsize=(10, 10)) + # plt_slice = [180, 260] + # fig, ax = dtwvis.plot_warpingpaths(series1[slice(*plt_slice)], series2[slice(*plt_slice)], + # lc.wp_slice(*plt_slice, *plt_slice), + # path=-1, figure=fig, showlegend=True) + # for match in matches: + # dtwvis.plot_warpingpaths_addpath(ax, dtwvis.path_slice(match.path, *plt_slice, *plt_slice)) + # pdf.savefig(fig) + + pdf.close() + + @numpyonly @pytest.mark.benchmark(group="subseqsearch_eeg") @pytest.mark.parametrize("use_c,use_lb", [(False, False), (True, False), (False, True), (True, True)]) From 113cd33a7d01f1b2632ae086323d6efe9f346155 Mon Sep 17 00:00:00 2001 From: wannesm Date: Sun, 24 Sep 2023 21:21:05 +0200 Subject: [PATCH 56/59] improve docs --- docs/conf.py | 2 +- docs/usage/dtw.rst | 2 +- dtaidistance/dtw.py | 14 +++++++------- setup.py | 3 ++- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index ecbb37bb..11b9de50 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -68,7 +68,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/usage/dtw.rst b/docs/usage/dtw.rst index e2059ef2..ba59a307 100644 --- a/docs/usage/dtw.rst +++ b/docs/usage/dtw.rst @@ -11,7 +11,7 @@ Dynamic Time Warping (DTW) path = dtw.warping_path(s1, s2) dtwvis.plot_warping(s1, s2, path, filename="warp.png") -.. figure:: https://people.cs.kuleuven.be/wannes.meert/dtw/dtw_example.png?v=5 +.. figure:: /_static/dtw_example.png :alt: DTW Example diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index eb56516d..198ccfc7 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -197,13 +197,13 @@ def distance(s1, s2, This function keeps a compact matrix, not the full warping paths matrix. - Uses dynamic programming to compute: + Uses dynamic programming to compute:: - wps[i, j] = (s1[i]-s2[j])**2 + min( - wps[i-1, j ] + penalty, // vertical / insertion / expansion - wps[i , j-1] + penalty, // horizontal / deletion / compression - wps[i-1, j-1]) // diagonal / match - dtw = sqrt(wps[-1, -1]) + wps[i, j] = (s1[i]-s2[j])**2 + min( + wps[i-1, j ] + penalty, // vertical / insertion / expansion + wps[i , j-1] + penalty, // horizontal / deletion / compression + wps[i-1, j-1]) // diagonal / match + dtw = sqrt(wps[-1, -1]) :param s1: First sequence :param s2: Second sequence @@ -216,7 +216,7 @@ def distance(s1, s2, :param psi: Psi relaxation parameter (ignore start and end of matching). If psi is a single integer, it is used for both start and end relaxations of both series. If psi is a 4-tuple, it is used as the psi-relaxation for - (begin series1, end series1, begin series2, end series2) + (begin series1, end series1, begin series2, end series2). Useful for cyclical series. :param use_c: Use fast pure c compiled functions :param use_pruning: Prune values based on Euclidean distance. diff --git a/setup.py b/setup.py index a5458591..29c1159d 100755 --- a/setup.py +++ b/setup.py @@ -459,7 +459,8 @@ def check_openmp(cc_bin, noxpreprocessor, printfn=print): install_requires = ['numpy'] # 'cython>=0.29.6', setup_requires = ['numpy'] # 'setuptools>=18.0', 'cython>=0.29.6', tests_require = ['pytest', 'pytest-benchmark'] -dev_require = tests_require + ['matplotlib>=3.0.0', 'numpy', 'scipy'] +dev_require = tests_require + ['matplotlib>=3.0.0', 'numpy', 'scipy', + 'sphinx', 'sphinx_rtd_theme'] # Check version number init_fn = here / 'dtaidistance' / '__init__.py' From c25dce54ee03b1412464d4955a9da3446b308847 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 4 Oct 2023 11:58:21 +0200 Subject: [PATCH 57/59] Update readthedocs to v2 template --- .readthedocs.yml | 14 ++++++++++---- docs/requirements.txt | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yml b/.readthedocs.yml index c1ffa000..5c28483a 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,9 +1,15 @@ +version: 2 + requirements_file: requirements.txt build: - image: latest + os: ubuntu-22.04 + tools: + python: "3.11" -python: - version: 3.6 - setup_py_install: true +sphinx: + configuration: docs/conf.py +python: + install: + - requirements: docs/requirements.txt diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..002d1b93 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +Cython From 2da5c3e20f2f8a032ea36b071ed51c4fe825f126 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 4 Oct 2023 12:25:59 +0200 Subject: [PATCH 58/59] sphinx template --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index 002d1b93..89dd4b57 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,2 @@ Cython +sphinx_rtd_theme From 93887bad1a6a9fedcb92df42ea4f507c63736f27 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 8 Nov 2023 10:43:26 +0100 Subject: [PATCH 59/59] docs --- dtaidistance/similarity.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index 5cf4b076..925d1514 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -17,6 +17,9 @@ def distance_to_similarity(D, r=None, method='exponential', return_params=False) - Reverse: r - D r is min(D) + max(D) if not given + All of these methods are monotonically decreasing transformations. The order of the + distances thus remains unchanged (only the direction). + Example usage:: dist_matrix = dtw.distance_matrix(series)