Skip to content

Commit

Permalink
Alignment with sklearn updates (#1346)
Browse files Browse the repository at this point in the history
* Deselect 'kulsinski' distance knn tests

* Add input validation for pairwise distance

* Move to previous joblib version on Win Python 3.7

* Add public functions to selected sklearn tests

* Fix predict_proba in SVM

* Conditional testing for public functions
  • Loading branch information
Alexsandruss authored Jun 29, 2023
1 parent 9eb6afe commit 40ccd1c
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 61 deletions.
3 changes: 3 additions & 0 deletions .ci/scripts/select_sklearn_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#===============================================================================
import argparse
import sys
from daal4py.sklearn._utils import sklearn_check_version


def parse_tests_tree(entry, prefix=''):
Expand Down Expand Up @@ -60,6 +61,8 @@ def parse_tests_tree(entry, prefix=''):
'test_svm.py'
]
}
if sklearn_check_version('1.2'):
tests_map["tests"] = ["test_public_functions.py"]


if __name__ == '__main__':
Expand Down
20 changes: 19 additions & 1 deletion daal4py/sklearn/metrics/_pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@ def _precompute_metric_params(*args, **kwrds):

import daal4py
from daal4py.sklearn.utils.validation import _daal_check_array
from .._utils import (getFPType, PatchingConditionsChain)
from .._utils import (getFPType, PatchingConditionsChain, sklearn_check_version)
from .._device_offload import support_usm_ndarray

if sklearn_check_version('1.3'):
from sklearn.utils._param_validation import (
validate_params, Integral, StrOptions)


def _daal4py_cosine_distance_dense(X):
X_fptype = getFPType(X)
Expand Down Expand Up @@ -215,3 +219,17 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None,
func = partial(distance.cdist, metric=metric, **kwds)

return _parallel_pairwise(X, Y, func, n_jobs, **kwds)


if sklearn_check_version('1.3'):
validation_kwargs = {'prefer_skip_nested_validation': True} \
if sklearn_check_version('1.4') else {}
pairwise_distances = validate_params(
{
"X": ["array-like", "sparse matrix"],
"Y": ["array-like", "sparse matrix", None],
"metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
"n_jobs": [Integral, None],
"force_all_finite": ["boolean", StrOptions({"allow-nan"})],
}, **validation_kwargs
)(pairwise_distances)
4 changes: 4 additions & 0 deletions deselected_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
# will exclude deselection in versions 0.18.1, and 0.18.2 only.

deselected_tests:
# 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3
- neighbors/tests/test_neighbors.py::test_kneighbors_brute_backend[float64-kulsinski] <1.3
- neighbors/tests/test_neighbors.py::test_radius_neighbors_brute_backend[kulsinski] <1.3

# test for KMeans FutureWarning is not removed from sklearn tests suit yet
- cluster/tests/test_k_means.py::test_change_n_init_future_warning[KMeans-10] ==1.4.dev0

Expand Down
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ scikit-learn==1.0.2 ; python_version == '3.7'
scikit-learn==1.2.2 ; python_version >= '3.8'
pandas==1.3.5 ; python_version == '3.7'
pandas==2.0.1 ; python_version >= '3.8'
joblib==1.2.0 ; python_version == '3.7' and sys_platform == 'win32'
67 changes: 37 additions & 30 deletions sklearnex/svm/nusvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from sklearn.svm import NuSVC as sklearn_NuSVC
from sklearn.utils.validation import _deprecate_positional_args
from sklearn.exceptions import NotFittedError
if sklearn_check_version('1.0'):
from sklearn.utils.metaestimators import available_if

from onedal.svm import NuSVC as onedal_NuSVC

Expand Down Expand Up @@ -114,36 +116,41 @@ def predict(self, X):
'sklearn': sklearn_NuSVC.predict,
}, X)

@property
def predict_proba(self):
"""
Compute probabilities of possible outcomes for samples in X.
The model need to have probability information computed at training
time: fit with attribute `probability` set to True.
Parameters
----------
X : array-like of shape (n_samples, n_features)
For kernel="precomputed", the expected shape of X is
(n_samples_test, n_samples_train).
Returns
-------
T : ndarray of shape (n_samples, n_classes)
Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute :term:`classes_`.
Notes
-----
The probability model is created using cross validation, so
the results can be slightly different than those obtained by
predict. Also, it will produce meaningless results on very small
datasets.
"""
self._check_proba()
return self._predict_proba
if sklearn_check_version('1.0'):
@available_if(sklearn_NuSVC._check_proba)
def predict_proba(self, X):
"""
Compute probabilities of possible outcomes for samples in X.
The model need to have probability information computed at training
time: fit with attribute `probability` set to True.
Parameters
----------
X : array-like of shape (n_samples, n_features)
For kernel="precomputed", the expected shape of X is
(n_samples_test, n_samples_train).
Returns
-------
T : ndarray of shape (n_samples, n_classes)
Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute :term:`classes_`.
Notes
-----
The probability model is created using cross validation, so
the results can be slightly different than those obtained by
predict. Also, it will produce meaningless results on very small
datasets.
"""
return self._predict_proba(X)
else:
@property
def predict_proba(self):
self._check_proba()
return self._predict_proba

@wrap_output_data
def _predict_proba(self, X):
Expand Down
67 changes: 37 additions & 30 deletions sklearnex/svm/svc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from sklearn.svm import SVC as sklearn_SVC
from sklearn.utils.validation import _deprecate_positional_args
from sklearn.exceptions import NotFittedError
if sklearn_check_version('1.0'):
from sklearn.utils.metaestimators import available_if

from onedal.svm import SVC as onedal_SVC

Expand Down Expand Up @@ -116,36 +118,41 @@ def predict(self, X):
'sklearn': sklearn_SVC.predict,
}, X)

@property
def predict_proba(self):
"""
Compute probabilities of possible outcomes for samples in X.
The model need to have probability information computed at training
time: fit with attribute `probability` set to True.
Parameters
----------
X : array-like of shape (n_samples, n_features)
For kernel="precomputed", the expected shape of X is
(n_samples_test, n_samples_train).
Returns
-------
T : ndarray of shape (n_samples, n_classes)
Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute :term:`classes_`.
Notes
-----
The probability model is created using cross validation, so
the results can be slightly different than those obtained by
predict. Also, it will produce meaningless results on very small
datasets.
"""
self._check_proba()
return self._predict_proba
if sklearn_check_version('1.0'):
@available_if(sklearn_SVC._check_proba)
def predict_proba(self, X):
"""
Compute probabilities of possible outcomes for samples in X.
The model need to have probability information computed at training
time: fit with attribute `probability` set to True.
Parameters
----------
X : array-like of shape (n_samples, n_features)
For kernel="precomputed", the expected shape of X is
(n_samples_test, n_samples_train).
Returns
-------
T : ndarray of shape (n_samples, n_classes)
Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute :term:`classes_`.
Notes
-----
The probability model is created using cross validation, so
the results can be slightly different than those obtained by
predict. Also, it will produce meaningless results on very small
datasets.
"""
return self._predict_proba(X)
else:
@property
def predict_proba(self):
self._check_proba()
return self._predict_proba

@wrap_output_data
def _predict_proba(self, X):
Expand Down

0 comments on commit 40ccd1c

Please sign in to comment.