From 40ccd1cb7108377270dbf09823f0784890c59ae0 Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Thu, 29 Jun 2023 20:22:40 +0300 Subject: [PATCH] Alignment with sklearn updates (#1346) * Deselect 'kulsinski' distance knn tests * Add input validation for pairwise distance * Move to previous joblib version on Win Python 3.7 * Add public functions to selected sklearn tests * Fix predict_proba in SVM * Conditional testing for public functions --- .ci/scripts/select_sklearn_tests.py | 3 ++ daal4py/sklearn/metrics/_pairwise.py | 20 ++++++++- deselected_tests.yaml | 4 ++ requirements-test.txt | 1 + sklearnex/svm/nusvc.py | 67 +++++++++++++++------------- sklearnex/svm/svc.py | 67 +++++++++++++++------------- 6 files changed, 101 insertions(+), 61 deletions(-) diff --git a/.ci/scripts/select_sklearn_tests.py b/.ci/scripts/select_sklearn_tests.py index c7a1d5654b..57453a13e9 100644 --- a/.ci/scripts/select_sklearn_tests.py +++ b/.ci/scripts/select_sklearn_tests.py @@ -16,6 +16,7 @@ #=============================================================================== import argparse import sys +from daal4py.sklearn._utils import sklearn_check_version def parse_tests_tree(entry, prefix=''): @@ -60,6 +61,8 @@ def parse_tests_tree(entry, prefix=''): 'test_svm.py' ] } +if sklearn_check_version('1.2'): + tests_map["tests"] = ["test_public_functions.py"] if __name__ == '__main__': diff --git a/daal4py/sklearn/metrics/_pairwise.py b/daal4py/sklearn/metrics/_pairwise.py index 6619fd14cf..a8e82145e3 100755 --- a/daal4py/sklearn/metrics/_pairwise.py +++ b/daal4py/sklearn/metrics/_pairwise.py @@ -35,9 +35,13 @@ def _precompute_metric_params(*args, **kwrds): import daal4py from daal4py.sklearn.utils.validation import _daal_check_array -from .._utils import (getFPType, PatchingConditionsChain) +from .._utils import (getFPType, PatchingConditionsChain, sklearn_check_version) from .._device_offload import support_usm_ndarray +if sklearn_check_version('1.3'): + from sklearn.utils._param_validation import ( + validate_params, Integral, StrOptions) + def _daal4py_cosine_distance_dense(X): X_fptype = getFPType(X) @@ -215,3 +219,17 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None, func = partial(distance.cdist, metric=metric, **kwds) return _parallel_pairwise(X, Y, func, n_jobs, **kwds) + + +if sklearn_check_version('1.3'): + validation_kwargs = {'prefer_skip_nested_validation': True} \ + if sklearn_check_version('1.4') else {} + pairwise_distances = validate_params( + { + "X": ["array-like", "sparse matrix"], + "Y": ["array-like", "sparse matrix", None], + "metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable], + "n_jobs": [Integral, None], + "force_all_finite": ["boolean", StrOptions({"allow-nan"})], + }, **validation_kwargs + )(pairwise_distances) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 02a8b13948..a76a5ee6dc 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -25,6 +25,10 @@ # will exclude deselection in versions 0.18.1, and 0.18.2 only. deselected_tests: + # 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3 + - neighbors/tests/test_neighbors.py::test_kneighbors_brute_backend[float64-kulsinski] <1.3 + - neighbors/tests/test_neighbors.py::test_radius_neighbors_brute_backend[kulsinski] <1.3 + # test for KMeans FutureWarning is not removed from sklearn tests suit yet - cluster/tests/test_k_means.py::test_change_n_init_future_warning[KMeans-10] ==1.4.dev0 diff --git a/requirements-test.txt b/requirements-test.txt index 6bade8a3a3..22392d9caa 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -6,3 +6,4 @@ scikit-learn==1.0.2 ; python_version == '3.7' scikit-learn==1.2.2 ; python_version >= '3.8' pandas==1.3.5 ; python_version == '3.7' pandas==2.0.1 ; python_version >= '3.8' +joblib==1.2.0 ; python_version == '3.7' and sys_platform == 'win32' diff --git a/sklearnex/svm/nusvc.py b/sklearnex/svm/nusvc.py index 9b3bfb9c4e..cba5aba42e 100644 --- a/sklearnex/svm/nusvc.py +++ b/sklearnex/svm/nusvc.py @@ -21,6 +21,8 @@ from sklearn.svm import NuSVC as sklearn_NuSVC from sklearn.utils.validation import _deprecate_positional_args from sklearn.exceptions import NotFittedError +if sklearn_check_version('1.0'): + from sklearn.utils.metaestimators import available_if from onedal.svm import NuSVC as onedal_NuSVC @@ -114,36 +116,41 @@ def predict(self, X): 'sklearn': sklearn_NuSVC.predict, }, X) - @property - def predict_proba(self): - """ - Compute probabilities of possible outcomes for samples in X. - - The model need to have probability information computed at training - time: fit with attribute `probability` set to True. - - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - For kernel="precomputed", the expected shape of X is - (n_samples_test, n_samples_train). - - Returns - ------- - T : ndarray of shape (n_samples, n_classes) - Returns the probability of the sample for each class in - the model. The columns correspond to the classes in sorted - order, as they appear in the attribute :term:`classes_`. - - Notes - ----- - The probability model is created using cross validation, so - the results can be slightly different than those obtained by - predict. Also, it will produce meaningless results on very small - datasets. - """ - self._check_proba() - return self._predict_proba + if sklearn_check_version('1.0'): + @available_if(sklearn_NuSVC._check_proba) + def predict_proba(self, X): + """ + Compute probabilities of possible outcomes for samples in X. + + The model need to have probability information computed at training + time: fit with attribute `probability` set to True. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + For kernel="precomputed", the expected shape of X is + (n_samples_test, n_samples_train). + + Returns + ------- + T : ndarray of shape (n_samples, n_classes) + Returns the probability of the sample for each class in + the model. The columns correspond to the classes in sorted + order, as they appear in the attribute :term:`classes_`. + + Notes + ----- + The probability model is created using cross validation, so + the results can be slightly different than those obtained by + predict. Also, it will produce meaningless results on very small + datasets. + """ + return self._predict_proba(X) + else: + @property + def predict_proba(self): + self._check_proba() + return self._predict_proba @wrap_output_data def _predict_proba(self, X): diff --git a/sklearnex/svm/svc.py b/sklearnex/svm/svc.py index 7b62918307..ff4f1c3466 100644 --- a/sklearnex/svm/svc.py +++ b/sklearnex/svm/svc.py @@ -24,6 +24,8 @@ from sklearn.svm import SVC as sklearn_SVC from sklearn.utils.validation import _deprecate_positional_args from sklearn.exceptions import NotFittedError +if sklearn_check_version('1.0'): + from sklearn.utils.metaestimators import available_if from onedal.svm import SVC as onedal_SVC @@ -116,36 +118,41 @@ def predict(self, X): 'sklearn': sklearn_SVC.predict, }, X) - @property - def predict_proba(self): - """ - Compute probabilities of possible outcomes for samples in X. - - The model need to have probability information computed at training - time: fit with attribute `probability` set to True. - - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - For kernel="precomputed", the expected shape of X is - (n_samples_test, n_samples_train). - - Returns - ------- - T : ndarray of shape (n_samples, n_classes) - Returns the probability of the sample for each class in - the model. The columns correspond to the classes in sorted - order, as they appear in the attribute :term:`classes_`. - - Notes - ----- - The probability model is created using cross validation, so - the results can be slightly different than those obtained by - predict. Also, it will produce meaningless results on very small - datasets. - """ - self._check_proba() - return self._predict_proba + if sklearn_check_version('1.0'): + @available_if(sklearn_SVC._check_proba) + def predict_proba(self, X): + """ + Compute probabilities of possible outcomes for samples in X. + + The model need to have probability information computed at training + time: fit with attribute `probability` set to True. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + For kernel="precomputed", the expected shape of X is + (n_samples_test, n_samples_train). + + Returns + ------- + T : ndarray of shape (n_samples, n_classes) + Returns the probability of the sample for each class in + the model. The columns correspond to the classes in sorted + order, as they appear in the attribute :term:`classes_`. + + Notes + ----- + The probability model is created using cross validation, so + the results can be slightly different than those obtained by + predict. Also, it will produce meaningless results on very small + datasets. + """ + return self._predict_proba(X) + else: + @property + def predict_proba(self): + self._check_proba() + return self._predict_proba @wrap_output_data def _predict_proba(self, X):