Skip to content

Commit

Permalink
[ENH] Issue 1641 - Matrix profile-based anomaly detectors: left STAMPi (
Browse files Browse the repository at this point in the history
#2091)

* added LeftSTAMPi implementation based on the implementation in TimeEval

* fixed example markup in LeftSTAMPi doctring and added LeftSTAMPi to the documentation

* updated maintainer name to github username

* exclude examples from doctest

* removed implementation of the streaming mode. Might be added again after a decision about the streaming API has been made.

* fixed state modification in 'predict' by moving the initialisation part to 'fit'.

* import stumpy only once

* mock stumpy to run unit tests if package not installed

* removed obsolete check for stumpy being loaded and added missing type hints

* Automatic `pre-commit` fixes

---------

Co-authored-by: ferewi <[email protected]>
  • Loading branch information
ferewi and ferewi authored Sep 27, 2024
1 parent bc583f8 commit 5fadd1c
Show file tree
Hide file tree
Showing 4 changed files with 487 additions and 0 deletions.
2 changes: 2 additions & 0 deletions aeon/anomaly_detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
"STRAY",
"PyODAdapter",
"STOMP",
"LeftSTAMPi",
]

from aeon.anomaly_detection._dwt_mlead import DWT_MLEAD
from aeon.anomaly_detection._kmeans import KMeansAD
from aeon.anomaly_detection._left_stampi import LeftSTAMPi
from aeon.anomaly_detection._merlin import MERLIN
from aeon.anomaly_detection._pyodadapter import PyODAdapter
from aeon.anomaly_detection._stomp import STOMP
Expand Down
161 changes: 161 additions & 0 deletions aeon/anomaly_detection/_left_stampi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""LeftSTAMPi anomaly detector."""

__maintainer__ = ["ferewi"]
__all__ = ["LeftSTAMPi"]


import numpy as np

from aeon.anomaly_detection.base import BaseAnomalyDetector
from aeon.utils.windowing import reverse_windowing


class LeftSTAMPi(BaseAnomalyDetector):
"""LeftSTAMPi anomaly detector.
LeftSTAMPi [1]_ calculates the left matrix profile of a time series,
which is the distance to the nearest neighbor of all already observed
subsequences (i.e. all preceding subsequences) in the time series,
in an incremental manner. The matrix profile is then used to calculate
the anomaly score for each time point. The larger the distance to the
nearest neighbor, the more anomalous the time point is.
LeftSTAMPi supports univariate time series only.
.. list-table:: Capabilities
:stub-columns: 1
* - Input data format
- univariate
* - Output data format
- anomaly scores
* - Learning Type
- unsupervised
Parameters
----------
window_size : int, default=3
Size of the sliding window. Defaults to the minimal possible value of 3.
n_init_train: int, default=3
The number of points used to init the matrix profile.
n_init_train must not be smaller than window_size.
The discord will not be found in this segment.
normalize : bool, default=True
Whether to normalize the windows before computing the distance.
p : float, default=2.0
The p-norm to use for the distance calculation.
k : int, default=1
The number of top distances to return.
Examples
--------
Calculate the anomaly score for the complete time series at once.
Internally,this is applying the incremental approach outlined below.
>>> import numpy as np # doctest: +SKIP
>>> from aeon.anomaly_detection import LeftSTAMPi # doctest: +SKIP
>>> X = np.random.default_rng(42).random((10)) # doctest: +SKIP
>>> detector = LeftSTAMPi(window_size=3, n_init_train=3) # doctest: +SKIP
>>> detector.fit_predict(X) # doctest: +SKIP
array([0. , 0. , 0. , 0.07042306, 0.15989868,
0.68912499, 0.75398303, 0.89696118, 0.5516023 , 0.69736132])
References
----------
.. [1] Chin-Chia Michael Yeh, Yan Zhu, Liudmila Ulanova, Nurjahan Begum,
Yifei Ding, Hoang Anh Dau, Diego Furtado Silva, Abdullah Mueen,
and Eamonn Keogh: "Matrix Profile I: All Pairs Similarity Joins
for Time Series: A Unifying View That Includes Motifs, Discords
and Shapelets.", In Proceedings of the International Conference
on Data Mining (ICDM), 1317–1322. doi: 10.1109/ICDM.2016.0179
"""

_tags = {
"capability:univariate": True,
"capability:multivariate": False,
"capability:missing_values": False,
"fit_is_empty": False,
"cant-pickle": True,
"python_dependencies": ["stumpy"],
}

def __init__(
self,
window_size: int = 3,
n_init_train: int = 3,
normalize: bool = True,
p: float = 2.0,
k: int = 1,
):
self.mp_: np.ndarray | None = None
self.window_size = window_size
self.n_init_train = n_init_train
self.normalize = normalize
self.p = p
self.k = k

super().__init__(axis=0)

def _check_params(self, X):
if self.window_size < 3 or self.window_size > len(X):
raise ValueError(
"The window size must be at least 3 and at most the length of the "
"time series."
)

if self.window_size > self.n_init_train:
raise ValueError(
f"The window size must be less than or equal to "
f"n_init_train (is: {self.n_init_train})"
)

if self.k < 1 or self.k > len(X) - self.window_size + 1:
raise ValueError(
"The top `k` distances must be at least 1 and at most the length of "
"the time series minus the window size."
)

def _fit(self, X: np.ndarray, y=None) -> "LeftSTAMPi":
if X.ndim > 1:
X = X.squeeze()
self._check_params(X)

self._call_stumpi(X)

return self

def _predict(self, X: np.ndarray) -> np.ndarray:
if X.ndim > 1:
X = X.squeeze()
self._check_params(X)

for x in X:
self.mp_.update(x)

lmp = self.mp_._left_P
lmp[: self.n_init_train] = 0
point_anomaly_scores = reverse_windowing(lmp, self.window_size)

return point_anomaly_scores

def _fit_predict(self, X: np.ndarray, y=None) -> np.ndarray:
if X.ndim > 1:
X = X.squeeze()

self.fit(X[: self.n_init_train])

return self.predict(X[self.n_init_train :])

def _call_stumpi(self, X: np.ndarray):
import stumpy

self.mp_ = stumpy.stumpi(
X,
m=self.window_size,
egress=False,
normalize=self.normalize,
p=self.p,
k=self.k,
)
Loading

0 comments on commit 5fadd1c

Please sign in to comment.