From 53c026d6dba19a8fe7bdc209c77548a7c98c60c5 Mon Sep 17 00:00:00 2001
From: CodeLionX <sebastian.schmidl@t-online.de>
Date: Thu, 8 Aug 2024 12:06:03 +0200
Subject: [PATCH 1/3] feat: support unsupervised and semi-supervised usage of
 PyODAdapter and fix #1873

---
 aeon/anomaly_detection/_pyodadapter.py        | 62 ++++++++++++++----
 .../tests/test_pyod_adapter.py                | 64 +++++++++++++++----
 2 files changed, 104 insertions(+), 22 deletions(-)

diff --git a/aeon/anomaly_detection/_pyodadapter.py b/aeon/anomaly_detection/_pyodadapter.py
index 28057a39f3..454510c405 100644
--- a/aeon/anomaly_detection/_pyodadapter.py
+++ b/aeon/anomaly_detection/_pyodadapter.py
@@ -34,6 +34,16 @@ class PyODAdapter(BaseAnomalyDetector):
     series the adapter concatenates the data points of each channel in the window to
     a single univariate feature vector per window as input to the PyOD model.
 
+    The PyOD adapter supports unsupervised and semi-supervised learning. The adapter
+    can be fitted on a reference time series and used to detect anomalies in a different
+    target time series with the same number of dimensions. The reference (or training)
+    time series does not need to be clean for most PyOD models. However, knowledge in
+    form of anomaly labels about the potential existing anomalies in the reference time
+    series are not used during the fitting process. Use `fit` to fit the model on the
+    reference time series and `predict` to detect anomalies in the target time series.
+    For unsupervised anomaly detection, use `fit_predict` directly on the target time
+    series.
+
     .. list-table:: Capabilities
        :stub-columns: 1
 
@@ -42,7 +52,7 @@ class PyODAdapter(BaseAnomalyDetector):
        * - Output data format
          - anomaly scores
        * - Learning Type
-         - unsupervised
+         - unsupervised or semi-supervised
 
 
     Parameters
@@ -70,6 +80,7 @@ class PyODAdapter(BaseAnomalyDetector):
         "capability:multivariate": True,
         "capability:univariate": True,
         "capability:missing_values": False,
+        "fit_is_empty": False,
         # Omit the version specification until PyOD has __version__
         # (https://github.com/yzhao062/pyod/pull/584 in dev but not released yet)
         # "python_dependencies": ["pyod>=1.1.3"]
@@ -83,7 +94,6 @@ def __init__(
         self.window_size = window_size
         self.stride = stride
 
-        self._padding_length = 0
         super().__init__(axis=0)
 
     @staticmethod
@@ -93,7 +103,37 @@ def _is_pyod_model(model: Any) -> bool:
 
         return isinstance(model, BaseDetector)
 
-    def _predict(self, X) -> np.ndarray:
+    def _fit(self, X: np.ndarray, y: np.ndarray | None = None) -> None:
+        self._check_params(X)
+        _X, _ = sliding_windows(
+            X, window_size=self.window_size, stride=self.stride, axis=0
+        )
+        self._inner_fit(_X)
+
+    def _predict(self, X: np.ndarray) -> np.ndarray:
+        _X, padding = sliding_windows(
+            X, window_size=self.window_size, stride=self.stride, axis=0
+        )
+        window_anomaly_scores = self.pyod_model.decision_function(_X)
+        point_anomaly_scores = reverse_windowing(
+            window_anomaly_scores, self.window_size, np.nanmean, self.stride, padding
+        )
+        return point_anomaly_scores
+
+    def _fit_predict(self, X: np.ndarray, y: np.ndarray | None = None) -> np.ndarray:
+        self._check_params(X)
+        _X, padding = sliding_windows(
+            X, window_size=self.window_size, stride=self.stride, axis=0
+        )
+        self._inner_fit(_X)
+
+        window_anomaly_scores = self.pyod_model.decision_scores_
+        point_anomaly_scores = reverse_windowing(
+            window_anomaly_scores, self.window_size, np.nanmean, self.stride, padding
+        )
+        return point_anomaly_scores
+
+    def _check_params(self, X: np.ndarray) -> None:
         if not self._is_pyod_model(self.pyod_model):
             raise ValueError("The provided model is not a compatible PyOD model.")
 
@@ -108,15 +148,15 @@ def _predict(self, X) -> np.ndarray:
                 "The stride must be at least 1 and at most the window size."
             )
 
-        _X, self._padding_length = sliding_windows(
-            X, window_size=self.window_size, stride=self.stride, axis=0
-        )
-        self.pyod_model.fit(_X)
-        scores = self.pyod_model.decision_scores_
-        scores = reverse_windowing(
-            scores, self.window_size, np.nanmean, self.stride, self._padding_length
+    def _inner_fit(self, X: np.ndarray) -> None:
+        self.pyod_model.fit(X)
+
+    def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray:
+        window_anomaly_scores = self.pyod_model.decision_function(X)
+        point_anomaly_scores = reverse_windowing(
+            window_anomaly_scores, self.window_size, np.nanmean, self.stride, padding
         )
-        return scores
+        return point_anomaly_scores
 
     @classmethod
     def get_test_params(cls, parameter_set="default"):
diff --git a/aeon/anomaly_detection/tests/test_pyod_adapter.py b/aeon/anomaly_detection/tests/test_pyod_adapter.py
index 49e190518b..690d9b732b 100644
--- a/aeon/anomaly_detection/tests/test_pyod_adapter.py
+++ b/aeon/anomaly_detection/tests/test_pyod_adapter.py
@@ -22,13 +22,12 @@ def test_pyod_adapter_default():
     series[50:58] -= 2
 
     ad = PyODAdapter(LOF(), window_size=10, stride=1)
-    pred = ad.predict(series, axis=0)
+    pred = ad.fit_predict(series, axis=0)
 
     assert pred.shape == (80,)
     assert pred.dtype == np.float_
     assert 50 <= np.argmax(pred) <= 60
     assert hasattr(ad, "pyod_model")
-    assert ad.pyod_model.decision_scores_.shape == (71,)
 
 
 @pytest.mark.skipif(
@@ -45,13 +44,12 @@ def test_pyod_adapter_multivariate():
     series[50:58, 0] -= 2
 
     ad = PyODAdapter(LOF(), window_size=10, stride=1)
-    pred = ad.predict(series, axis=0)
+    pred = ad.fit_predict(series, axis=0)
 
     assert pred.shape == (80,)
     assert pred.dtype == np.float_
     assert 50 <= np.argmax(pred) <= 60
     assert hasattr(ad, "pyod_model")
-    assert ad.pyod_model.decision_scores_.shape == (71,)
 
 
 @pytest.mark.skipif(
@@ -66,13 +64,12 @@ def test_pyod_adapter_no_window_univariate():
     series[50:58] -= 2
 
     ad = PyODAdapter(LOF(), window_size=1, stride=1)
-    pred = ad.predict(series, axis=0)
+    pred = ad.fit_predict(series, axis=0)
 
     assert pred.shape == (80,)
     assert pred.dtype == np.float_
     assert 50 <= np.argmax(pred) <= 60
     assert hasattr(ad, "pyod_model")
-    assert ad.pyod_model.decision_scores_.shape == (80,)
 
 
 @pytest.mark.skipif(
@@ -89,13 +86,12 @@ def test_pyod_adapter_no_window_multivariate():
     series[50:58, 0] -= 2
 
     ad = PyODAdapter(LOF(), window_size=1, stride=1)
-    pred = ad.predict(series, axis=0)
+    pred = ad.fit_predict(series, axis=0)
 
     assert pred.shape == (80,)
     assert pred.dtype == np.float_
     assert 50 <= np.argmax(pred) <= 60
     assert hasattr(ad, "pyod_model")
-    assert ad.pyod_model.decision_scores_.shape == (80,)
 
 
 @pytest.mark.skipif(
@@ -110,13 +106,12 @@ def test_pyod_adapter_stride_univariate():
     series[50:58] -= 2
 
     ad = PyODAdapter(LOF(), window_size=10, stride=5)
-    pred = ad.predict(series, axis=0)
+    pred = ad.fit_predict(series, axis=0)
 
     assert pred.shape == (80,)
     assert pred.dtype == np.float_
     assert 50 <= np.argmax(pred) <= 60
     assert hasattr(ad, "pyod_model")
-    assert ad.pyod_model.decision_scores_.shape == (15,)
 
 
 @pytest.mark.skipif(
@@ -133,10 +128,57 @@ def test_pyod_adapter_stride_multivariate():
     series[50:58, 0] -= 2
 
     ad = PyODAdapter(LOF(), window_size=10, stride=5)
+    pred = ad.fit_predict(series, axis=0)
+
+    assert pred.shape == (80,)
+    assert pred.dtype == np.float_
+    assert 50 <= np.argmax(pred) <= 60
+    assert hasattr(ad, "pyod_model")
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies("pyod", severity="none"),
+    reason="required soft dependency PyOD not available",
+)
+def test_pyod_adapter_semi_supervised_univariate():
+    """Test PyODAdapter in semi-supervised mode."""
+    from pyod.models.lof import LOF
+
+    series = make_series(n_timepoints=80, return_numpy=True, random_state=0)
+    series[50:58] -= 2
+    train_series = make_series(n_timepoints=100, return_numpy=True, random_state=1)
+
+    ad = PyODAdapter(LOF(), window_size=10)
+    ad.fit(train_series, axis=0)
+    pred = ad.predict(series, axis=0)
+
+    assert pred.shape == (80,)
+    assert pred.dtype == np.float_
+    assert 50 <= np.argmax(pred) <= 60
+    assert hasattr(ad, "pyod_model")
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies("pyod", severity="none"),
+    reason="required soft dependency PyOD not available",
+)
+def test_pyod_adapter_semi_supervised_multivariate():
+    """Test PyODAdapter in semi-supervised mode (multivariate)."""
+    from pyod.models.lof import LOF
+
+    series = make_series(
+        n_timepoints=80, n_columns=2, return_numpy=True, random_state=0
+    )
+    series[50:58, 0] -= 2
+    train_series = make_series(
+        n_timepoints=100, n_columns=2, return_numpy=True, random_state=1
+    )
+
+    ad = PyODAdapter(LOF(), window_size=10, stride=5)
+    ad.fit(train_series, axis=0)
     pred = ad.predict(series, axis=0)
 
     assert pred.shape == (80,)
     assert pred.dtype == np.float_
     assert 50 <= np.argmax(pred) <= 60
     assert hasattr(ad, "pyod_model")
-    assert ad.pyod_model.decision_scores_.shape == (15,)

From 8b88300acd32c6782bf31c1d851942d2e6d6b7df Mon Sep 17 00:00:00 2001
From: CodeLionX <sebastian.schmidl@t-online.de>
Date: Thu, 8 Aug 2024 12:08:00 +0200
Subject: [PATCH 2/3] refactor: avoid code duplication in kmeans

---
 aeon/anomaly_detection/_kmeans.py | 58 +++++++++++++++++++------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/aeon/anomaly_detection/_kmeans.py b/aeon/anomaly_detection/_kmeans.py
index 4eefc018d8..061df7d769 100644
--- a/aeon/anomaly_detection/_kmeans.py
+++ b/aeon/anomaly_detection/_kmeans.py
@@ -110,46 +110,60 @@ def __init__(
         self.estimator_: Optional[KMeans] = None
 
     def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "KMeansAD":
+        self._check_params(X)
         _X, _ = sliding_windows(
             X, window_size=self.window_size, stride=self.stride, axis=0
         )
-        self.estimator_ = KMeans(
-            n_clusters=self.n_clusters,
-            random_state=self.random_state,
-            init="k-means++",
-            n_init=10,
-            max_iter=300,
-            tol=1e-4,
-            verbose=0,
-            algorithm="lloyd",
-        )
-        self.estimator_.fit(_X)
+        self._inner_fit(_X)
         return self
 
     def _predict(self, X) -> np.ndarray:
         _X, padding = sliding_windows(
             X, window_size=self.window_size, stride=self.stride, axis=0
         )
-        clusters = self.estimator_.predict(_X)
-        window_scores = np.linalg.norm(
-            _X - self.estimator_.cluster_centers_[clusters], axis=1
-        )
-        point_anomaly_scores = reverse_windowing(
-            window_scores, self.window_size, np.nanmean, self.stride, padding
-        )
+        point_anomaly_scores = self._inner_predict(_X, padding)
         return point_anomaly_scores
 
     def _fit_predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray:
+        self._check_params(X)
         _X, padding = sliding_windows(
             X, window_size=self.window_size, stride=self.stride, axis=0
         )
+        self._inner_fit(_X)
+        point_anomaly_scores = self._inner_predict(_X, padding)
+        return point_anomaly_scores
+
+    def _check_params(self, X: np.ndarray) -> None:
+        if self.window_size < 1 or self.window_size > X.shape[0]:
+            raise ValueError(
+                "The window size must be at least 1 and at most the length of the "
+                "time series."
+            )
+
+        if self.stride < 1 or self.stride > self.window_size:
+            raise ValueError(
+                "The stride must be at least 1 and at most the window size."
+            )
+        if self.n_clusters < 1:
+            raise ValueError("The number of clusters must be at least 1.")
+
+    def _inner_fit(self, X: np.ndarray) -> None:
         self.estimator_ = KMeans(
-            n_clusters=self.n_clusters, random_state=self.random_state
+            n_clusters=self.n_clusters,
+            random_state=self.random_state,
+            init="k-means++",
+            n_init=10,
+            max_iter=300,
+            tol=1e-4,
+            verbose=0,
+            algorithm="lloyd",
         )
-        self.estimator_.fit(_X)
-        clusters = self.estimator_.predict(_X)
+        self.estimator_.fit(X)
+
+    def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray:
+        clusters = self.estimator_.predict(X)
         window_scores = np.linalg.norm(
-            _X - self.estimator_.cluster_centers_[clusters], axis=1
+            X - self.estimator_.cluster_centers_[clusters], axis=1
         )
         point_anomaly_scores = reverse_windowing(
             window_scores, self.window_size, np.nanmean, self.stride, padding

From f00c5424fb24c7da7a81e83c2e0cfb3c80dda72b Mon Sep 17 00:00:00 2001
From: CodeLionX <sebastian.schmidl@t-online.de>
Date: Thu, 8 Aug 2024 12:08:40 +0200
Subject: [PATCH 3/3] feat: improve documentation of anomaly detection module

---
 aeon/anomaly_detection/base.py           | 10 ++--
 aeon/datasets/dataset_collections.py     |  2 +-
 docs/api_reference/anomaly_detection.rst | 59 +++++++++++++++++++++++-
 3 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py
index 573938a158..9d6896c540 100644
--- a/aeon/anomaly_detection/base.py
+++ b/aeon/anomaly_detection/base.py
@@ -46,19 +46,23 @@ class BaseAnomalyDetector(BaseSeriesEstimator, ABC):
         Unsupervised (default):
             Unsupervised detectors do not require any training data and can directly be
             used on the target time series. Their tags are set to ``fit_is_empty=True``
-            and ``requires_y=False``.
+            and ``requires_y=False``. You would usually call the ``fit_predict`` method
+            on these detectors.
         Semi-supervised:
             Semi-supervised detectors require a training step on a time series without
             anomalies (normal behaving time series). The target value ``y`` would
             consist of only zeros. Thus, these algorithms have logic in the ``fit``
             method, but do not require the target values. Their tags are set to
-            ``fit_is_empty=False`` and ``requires_y=False``.
+            ``fit_is_empty=False`` and ``requires_y=False``. You would usually first
+            call the ``fit`` method on the training data and then the ``predict``
+            method for your target time series.
         Supervised:
             Supervised detectors require a training step on a time series with known
             anomalies (anomalies should be present and must be annotated). The detector
             implements the ``fit`` method, and the target value ``y`` consists of zeros
             and ones. Their tags are, thus, set to ``fit_is_empty=False`` and
-            ``requires_y=True``.
+            ``requires_y=True``. You would usually first call the ``fit`` method on the
+            training data and then the ``predict`` method for your target time series.
 
     Parameters
     ----------
diff --git a/aeon/datasets/dataset_collections.py b/aeon/datasets/dataset_collections.py
index be82060876..71c40ff7d0 100644
--- a/aeon/datasets/dataset_collections.py
+++ b/aeon/datasets/dataset_collections.py
@@ -1,7 +1,7 @@
 """
 List of datasets available for classification, regression and forecasting archives.
 
-The data can also be used for clustering.
+The classification and regression data can also be used for clustering.
 
 Classification data can be downloaded directly from the timeseriesclassification.com
 archive.
diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst
index 8eec301ec8..dbf6c108bd 100644
--- a/docs/api_reference/anomaly_detection.rst
+++ b/docs/api_reference/anomaly_detection.rst
@@ -4,9 +4,64 @@ Anomaly Detection
 =================
 
 Time Series Anomaly Detection aims at discovering regions of a time series that in
-some way not representative of the underlying generative process.
+some way are not representative of the underlying generative process.
 The :mod:`aeon.anomaly_detection` module contains algorithms and tools
-for time series anomaly detection.
+for time series anomaly detection. The detectors have different capabilities that can
+be grouped into the following categories, where ``m`` is the number of time points and
+``d`` is the number of channels for a time series:
+
+Input data format (one of the following):
+    Univariate series (default):
+        Example: :class:`~aeon.anomaly_detection.MERLIN`.
+
+        - np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis.
+        - pd.DataFrame, shape ``(m, 1)`` or ``(1, m)`` depending on axis.
+        - pd.Series, shape ``(m,)``.
+    Multivariate series:
+        Example: :class:`~aeon.anomaly_detection.KMeansAD`.
+
+        - np.ndarray array, shape ``(m, d)`` or ``(d, m)`` depending on axis.
+        - pd.DataFrame ``(m, d)`` or ``(d, m)`` depending on axis.
+
+Output data format (one of the following):
+    Anomaly scores (default):
+        np.ndarray, shape ``(m,)`` of type float. For each point of the input time
+        series, the anomaly score is a float value indicating the degree of
+        anomalousness. The higher the score, the more anomalous the point. The
+        detectors return raw anomaly scores that are not normalized.
+        Example: :class:`~aeon.anomaly_detection.PyODAdapter`.
+    Binary classification:
+        np.ndarray, shape ``(m,)`` of type bool or int. For each point of the input
+        time series, the output is a boolean or integer value indicating whether the
+        point is anomalous (``True``/``1``) or not (``False``/``0``).
+        Example: :class:`~aeon.anomaly_detection.STRAY`.
+
+Detector learning types:
+    Unsupervised (default):
+        Unsupervised detectors do not require any training data and can directly be
+        used on the target time series. You would usually call the ``fit_predict``
+        method on these detectors.
+        Example: :class:`~aeon.anomaly_detection.DWT_MLEAD`.
+    Semi-supervised:
+        Semi-supervised detectors require a training step on a time series without
+        anomalies (normal behaving time series). The target value ``y`` would
+        consist of only zeros. You would usually first call the ``fit`` method on the
+        training time series and then the ``predict`` method on your target time series.
+        Example: :class:`~aeon.anomaly_detection.KMeansAD`.
+    Supervised:
+        Supervised detectors require a training step on a time series with known
+        anomalies (anomalies should be present and must be annotated). The detector
+        implements the ``fit`` method, and the target value ``y`` consists of zeros
+        and ones; ones indicating points of an anomaly. You would usually first call
+        the ``fit`` method on the training data and then the ``predict`` method on your
+        target time series.
+
+Each detector in this module specifies its supported input data format, output data
+format, and learning type as an overview table in its documentation. Some detectors
+support multiple learning types.
+
+Detectors
+---------
 
 .. currentmodule:: aeon.anomaly_detection