From 40ccd1cb7108377270dbf09823f0784890c59ae0 Mon Sep 17 00:00:00 2001
From: Alexander Andreev <alexander.andreev@intel.com>
Date: Thu, 29 Jun 2023 20:22:40 +0300
Subject: [PATCH] Alignment with sklearn updates (#1346)

* Deselect 'kulsinski' distance knn tests

* Add input validation for pairwise distance

* Move to previous joblib version on Win Python 3.7

* Add public functions to selected sklearn tests

* Fix predict_proba in SVM

* Conditional testing for public functions
---
 .ci/scripts/select_sklearn_tests.py  |  3 ++
 daal4py/sklearn/metrics/_pairwise.py | 20 ++++++++-
 deselected_tests.yaml                |  4 ++
 requirements-test.txt                |  1 +
 sklearnex/svm/nusvc.py               | 67 +++++++++++++++-------------
 sklearnex/svm/svc.py                 | 67 +++++++++++++++-------------
 6 files changed, 101 insertions(+), 61 deletions(-)

diff --git a/.ci/scripts/select_sklearn_tests.py b/.ci/scripts/select_sklearn_tests.py
index c7a1d5654b..57453a13e9 100644
--- a/.ci/scripts/select_sklearn_tests.py
+++ b/.ci/scripts/select_sklearn_tests.py
@@ -16,6 +16,7 @@
 #===============================================================================
 import argparse
 import sys
+from daal4py.sklearn._utils import sklearn_check_version
 
 
 def parse_tests_tree(entry, prefix=''):
@@ -60,6 +61,8 @@ def parse_tests_tree(entry, prefix=''):
         'test_svm.py'
     ]
 }
+if sklearn_check_version('1.2'):
+    tests_map["tests"] = ["test_public_functions.py"]
 
 
 if __name__ == '__main__':
diff --git a/daal4py/sklearn/metrics/_pairwise.py b/daal4py/sklearn/metrics/_pairwise.py
index 6619fd14cf..a8e82145e3 100755
--- a/daal4py/sklearn/metrics/_pairwise.py
+++ b/daal4py/sklearn/metrics/_pairwise.py
@@ -35,9 +35,13 @@ def _precompute_metric_params(*args, **kwrds):
 
 import daal4py
 from daal4py.sklearn.utils.validation import _daal_check_array
-from .._utils import (getFPType, PatchingConditionsChain)
+from .._utils import (getFPType, PatchingConditionsChain, sklearn_check_version)
 from .._device_offload import support_usm_ndarray
 
+if sklearn_check_version('1.3'):
+    from sklearn.utils._param_validation import (
+        validate_params, Integral, StrOptions)
+
 
 def _daal4py_cosine_distance_dense(X):
     X_fptype = getFPType(X)
@@ -215,3 +219,17 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None,
         func = partial(distance.cdist, metric=metric, **kwds)
 
     return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
+
+
+if sklearn_check_version('1.3'):
+    validation_kwargs = {'prefer_skip_nested_validation': True} \
+        if sklearn_check_version('1.4') else {}
+    pairwise_distances = validate_params(
+        {
+            "X": ["array-like", "sparse matrix"],
+            "Y": ["array-like", "sparse matrix", None],
+            "metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
+            "n_jobs": [Integral, None],
+            "force_all_finite": ["boolean", StrOptions({"allow-nan"})],
+        }, **validation_kwargs
+    )(pairwise_distances)
diff --git a/deselected_tests.yaml b/deselected_tests.yaml
index 02a8b13948..a76a5ee6dc 100755
--- a/deselected_tests.yaml
+++ b/deselected_tests.yaml
@@ -25,6 +25,10 @@
 #  will exclude deselection in versions 0.18.1, and 0.18.2 only.
 
 deselected_tests:
+  # 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3
+  - neighbors/tests/test_neighbors.py::test_kneighbors_brute_backend[float64-kulsinski] <1.3
+  - neighbors/tests/test_neighbors.py::test_radius_neighbors_brute_backend[kulsinski] <1.3
+
   # test for KMeans FutureWarning is not removed from sklearn tests suit yet
   - cluster/tests/test_k_means.py::test_change_n_init_future_warning[KMeans-10] ==1.4.dev0
 
diff --git a/requirements-test.txt b/requirements-test.txt
index 6bade8a3a3..22392d9caa 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -6,3 +6,4 @@ scikit-learn==1.0.2 ; python_version == '3.7'
 scikit-learn==1.2.2 ; python_version >= '3.8'
 pandas==1.3.5 ; python_version == '3.7'
 pandas==2.0.1 ; python_version >= '3.8'
+joblib==1.2.0 ; python_version == '3.7' and sys_platform == 'win32'
diff --git a/sklearnex/svm/nusvc.py b/sklearnex/svm/nusvc.py
index 9b3bfb9c4e..cba5aba42e 100644
--- a/sklearnex/svm/nusvc.py
+++ b/sklearnex/svm/nusvc.py
@@ -21,6 +21,8 @@
 from sklearn.svm import NuSVC as sklearn_NuSVC
 from sklearn.utils.validation import _deprecate_positional_args
 from sklearn.exceptions import NotFittedError
+if sklearn_check_version('1.0'):
+    from sklearn.utils.metaestimators import available_if
 
 from onedal.svm import NuSVC as onedal_NuSVC
 
@@ -114,36 +116,41 @@ def predict(self, X):
             'sklearn': sklearn_NuSVC.predict,
         }, X)
 
-    @property
-    def predict_proba(self):
-        """
-        Compute probabilities of possible outcomes for samples in X.
-
-        The model need to have probability information computed at training
-        time: fit with attribute `probability` set to True.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            For kernel="precomputed", the expected shape of X is
-            (n_samples_test, n_samples_train).
-
-        Returns
-        -------
-        T : ndarray of shape (n_samples, n_classes)
-            Returns the probability of the sample for each class in
-            the model. The columns correspond to the classes in sorted
-            order, as they appear in the attribute :term:`classes_`.
-
-        Notes
-        -----
-        The probability model is created using cross validation, so
-        the results can be slightly different than those obtained by
-        predict. Also, it will produce meaningless results on very small
-        datasets.
-        """
-        self._check_proba()
-        return self._predict_proba
+    if sklearn_check_version('1.0'):
+        @available_if(sklearn_NuSVC._check_proba)
+        def predict_proba(self, X):
+            """
+            Compute probabilities of possible outcomes for samples in X.
+
+            The model need to have probability information computed at training
+            time: fit with attribute `probability` set to True.
+
+            Parameters
+            ----------
+            X : array-like of shape (n_samples, n_features)
+                For kernel="precomputed", the expected shape of X is
+                (n_samples_test, n_samples_train).
+
+            Returns
+            -------
+            T : ndarray of shape (n_samples, n_classes)
+                Returns the probability of the sample for each class in
+                the model. The columns correspond to the classes in sorted
+                order, as they appear in the attribute :term:`classes_`.
+
+            Notes
+            -----
+            The probability model is created using cross validation, so
+            the results can be slightly different than those obtained by
+            predict. Also, it will produce meaningless results on very small
+            datasets.
+            """
+            return self._predict_proba(X)
+    else:
+        @property
+        def predict_proba(self):
+            self._check_proba()
+            return self._predict_proba
 
     @wrap_output_data
     def _predict_proba(self, X):
diff --git a/sklearnex/svm/svc.py b/sklearnex/svm/svc.py
index 7b62918307..ff4f1c3466 100644
--- a/sklearnex/svm/svc.py
+++ b/sklearnex/svm/svc.py
@@ -24,6 +24,8 @@
 from sklearn.svm import SVC as sklearn_SVC
 from sklearn.utils.validation import _deprecate_positional_args
 from sklearn.exceptions import NotFittedError
+if sklearn_check_version('1.0'):
+    from sklearn.utils.metaestimators import available_if
 
 from onedal.svm import SVC as onedal_SVC
 
@@ -116,36 +118,41 @@ def predict(self, X):
             'sklearn': sklearn_SVC.predict,
         }, X)
 
-    @property
-    def predict_proba(self):
-        """
-        Compute probabilities of possible outcomes for samples in X.
-
-        The model need to have probability information computed at training
-        time: fit with attribute `probability` set to True.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            For kernel="precomputed", the expected shape of X is
-            (n_samples_test, n_samples_train).
-
-        Returns
-        -------
-        T : ndarray of shape (n_samples, n_classes)
-            Returns the probability of the sample for each class in
-            the model. The columns correspond to the classes in sorted
-            order, as they appear in the attribute :term:`classes_`.
-
-        Notes
-        -----
-        The probability model is created using cross validation, so
-        the results can be slightly different than those obtained by
-        predict. Also, it will produce meaningless results on very small
-        datasets.
-        """
-        self._check_proba()
-        return self._predict_proba
+    if sklearn_check_version('1.0'):
+        @available_if(sklearn_SVC._check_proba)
+        def predict_proba(self, X):
+            """
+            Compute probabilities of possible outcomes for samples in X.
+
+            The model need to have probability information computed at training
+            time: fit with attribute `probability` set to True.
+
+            Parameters
+            ----------
+            X : array-like of shape (n_samples, n_features)
+                For kernel="precomputed", the expected shape of X is
+                (n_samples_test, n_samples_train).
+
+            Returns
+            -------
+            T : ndarray of shape (n_samples, n_classes)
+                Returns the probability of the sample for each class in
+                the model. The columns correspond to the classes in sorted
+                order, as they appear in the attribute :term:`classes_`.
+
+            Notes
+            -----
+            The probability model is created using cross validation, so
+            the results can be slightly different than those obtained by
+            predict. Also, it will produce meaningless results on very small
+            datasets.
+            """
+            return self._predict_proba(X)
+    else:
+        @property
+        def predict_proba(self):
+            self._check_proba()
+            return self._predict_proba
 
     @wrap_output_data
     def _predict_proba(self, X):