Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cluster model function traces #700

Merged
merged 31 commits into from
Dec 20, 2022
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
9ea466a
Add sklearn to tox
hmstepanek Nov 10, 2022
07fef71
Add function traces around model methods
hmstepanek Nov 10, 2022
8a39862
Support Python 2.7 & 3.7 sklearn
hmstepanek Nov 16, 2022
c72d9df
Add test for multiple calls to model method
hmstepanek Nov 16, 2022
fc1f179
Fixup: add comments & organize
hmstepanek Nov 17, 2022
59a9511
Add ensemble models
lrafeei Nov 17, 2022
b26fa84
Add ensemble model tests
lrafeei Nov 18, 2022
fba42c8
Edit tests
lrafeei Nov 18, 2022
80ad2c0
Add ensemble library models from sklearn
lrafeei Nov 23, 2022
531b12a
Start tests with empty commit
lrafeei Nov 23, 2022
94cb0c2
Clean up tests
lrafeei Nov 23, 2022
53e4ea0
Add cluster model instrumentaton
lrafeei Nov 24, 2022
e06d0c4
Merge branch 'feature-scikitlearn' into add-cluster-model-function-tr…
lrafeei Nov 28, 2022
5b9d1b3
Merge branch 'feature-scikitlearn' into add-ensemble-model-function-t…
lrafeei Nov 28, 2022
36115e5
Fix tests for various versions of sklearn
lrafeei Dec 2, 2022
7328d7a
Merge branch 'develop-scikitlearn' into add-ensemble-model-function-t…
lrafeei Dec 6, 2022
ad14a00
Fix ensemble tests with changes from tree PR
lrafeei Dec 7, 2022
3d55348
[Mega-Linter] Apply linters fixes
lrafeei Dec 7, 2022
c9b599c
Merge branch 'develop-scikitlearn' into add-ensemble-model-function-t…
lrafeei Dec 7, 2022
2af93ea
Merge changes from ensemble PR
lrafeei Dec 7, 2022
d4290d8
Fix some cluster model tests
lrafeei Dec 7, 2022
f066a08
Fix tests after ensemble PR merge
lrafeei Dec 7, 2022
9bd374b
Add transform to tests
lrafeei Dec 7, 2022
62267e1
Merge branch 'develop-scikitlearn' into add-cluster-model-function-tr…
lrafeei Dec 7, 2022
d2b4900
Remove accidental commits
lrafeei Dec 14, 2022
8d36ed2
Merge branch 'develop-scikitlearn' into add-cluster-model-function-tr…
lrafeei Dec 14, 2022
b014e98
Modify cluster tests to be more readable
lrafeei Dec 14, 2022
7771fe8
Break up instrumentation models
lrafeei Dec 14, 2022
6e6e3fa
Remove duplicate ensemble module defs
hmstepanek Dec 15, 2022
4e5a1f9
Merge branch 'develop-scikitlearn' into add-cluster-model-function-tr…
lrafeei Dec 19, 2022
c0d725d
Modify VotingRegressor test
lrafeei Dec 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2902,6 +2902,120 @@ def _process_module_builtin_defaults():
"instrument_sklearn_ensemble_hist_models",
)

_process_module_definition(
"sklearn.cluster._affinity_propagation",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.affinity_propagation_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._agglomerative",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_agglomerative_models",
)

_process_module_definition(
"sklearn.cluster.hierarchical",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_agglomerative_models",
)

_process_module_definition(
"sklearn.cluster._birch",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.birch",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._bisect_k_means",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_kmeans_models",
)

_process_module_definition(
"sklearn.cluster._dbscan",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.dbscan_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
lrafeei marked this conversation as resolved.
Show resolved Hide resolved
"sklearn.cluster._feature_agglomeration",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._kmeans",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_kmeans_models",
)

_process_module_definition(
"sklearn.cluster.k_means_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_kmeans_models",
)

_process_module_definition(
"sklearn.cluster._mean_shift",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster.mean_shift_",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._optics",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_models",
)

_process_module_definition(
"sklearn.cluster._spectral",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"sklearn.cluster.spectral",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"sklearn.cluster._bicluster",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"sklearn.cluster.bicluster",
"newrelic.hooks.mlmodel_sklearn",
"instrument_sklearn_cluster_clustering_models",
)

_process_module_definition(
"rest_framework.views",
"newrelic.hooks.component_djangorestframework",
Expand Down
37 changes: 37 additions & 0 deletions newrelic/hooks/mlmodel_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,43 @@ def instrument_sklearn_ensemble_hist_models(module):
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_models(module):
model_classes = (
"AffinityPropagation",
"Birch",
"DBSCAN",
"MeanShift",
"OPTICS",
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_agglomerative_models(module):
model_classes = (
"AgglomerativeClustering",
"FeatureAgglomeration",
lrafeei marked this conversation as resolved.
Show resolved Hide resolved
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_clustering_models(module):
model_classes = (
"SpectralBiclustering",
"SpectralCoclustering",
"SpectralClustering",
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_cluster_kmeans_models(module):
model_classes = (
"BisectingKMeans",
"KMeans",
"MiniBatchKMeans",
)
_instrument_sklearn_models(module, model_classes)


def instrument_sklearn_metrics(module):
for scorer in METRIC_SCORERS:
if hasattr(module, scorer):
Expand Down
186 changes: 186 additions & 0 deletions tests/mlmodel_sklearn/test_cluster_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# Copyright 2010 New Relic, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from sklearn import __version__ # noqa: this is needed for get_package_version
from testing_support.validators.validate_transaction_metrics import (
validate_transaction_metrics,
)

from newrelic.api.background_task import background_task
from newrelic.common.package_version_utils import get_package_version
from newrelic.packages import six

SKLEARN_VERSION = tuple(map(int, get_package_version("sklearn").split(".")))


@pytest.mark.parametrize(
"cluster_model_name",
[
"AffinityPropagation",
"AgglomerativeClustering",
"Birch",
"DBSCAN",
"FeatureAgglomeration",
"KMeans",
"MeanShift",
"MiniBatchKMeans",
"SpectralBiclustering",
"SpectralCoclustering",
"SpectralClustering",
],
)
def test_below_v1_1_model_methods_wrapped_in_function_trace(cluster_model_name, run_cluster_model):
expected_scoped_metrics = {
"AffinityPropagation": [
("Function/MLModel/Sklearn/Named/AffinityPropagation.fit", 2),
("Function/MLModel/Sklearn/Named/AffinityPropagation.predict", 1),
("Function/MLModel/Sklearn/Named/AffinityPropagation.fit_predict", 1),
],
"AgglomerativeClustering": [
("Function/MLModel/Sklearn/Named/AgglomerativeClustering.fit", 2),
("Function/MLModel/Sklearn/Named/AgglomerativeClustering.fit_predict", 1),
],
"Birch": [
("Function/MLModel/Sklearn/Named/Birch.fit", 2),
(
"Function/MLModel/Sklearn/Named/Birch.predict",
1 if SKLEARN_VERSION >= (1, 0, 0) else 3,
),
("Function/MLModel/Sklearn/Named/Birch.fit_predict", 1),
("Function/MLModel/Sklearn/Named/Birch.transform", 1),
],
"DBSCAN": [
("Function/MLModel/Sklearn/Named/DBSCAN.fit", 2),
("Function/MLModel/Sklearn/Named/DBSCAN.fit_predict", 1),
],
"FeatureAgglomeration": [
("Function/MLModel/Sklearn/Named/FeatureAgglomeration.fit", 1),
("Function/MLModel/Sklearn/Named/FeatureAgglomeration.transform", 1),
],
"KMeans": [
("Function/MLModel/Sklearn/Named/KMeans.fit", 2),
("Function/MLModel/Sklearn/Named/KMeans.predict", 1),
("Function/MLModel/Sklearn/Named/KMeans.fit_predict", 1),
("Function/MLModel/Sklearn/Named/KMeans.transform", 1),
],
"MeanShift": [
("Function/MLModel/Sklearn/Named/MeanShift.fit", 2),
("Function/MLModel/Sklearn/Named/MeanShift.predict", 1),
("Function/MLModel/Sklearn/Named/MeanShift.fit_predict", 1),
],
"MiniBatchKMeans": [
("Function/MLModel/Sklearn/Named/MiniBatchKMeans.fit", 2),
("Function/MLModel/Sklearn/Named/MiniBatchKMeans.predict", 1),
("Function/MLModel/Sklearn/Named/MiniBatchKMeans.fit_predict", 1),
],
"SpectralBiclustering": [
("Function/MLModel/Sklearn/Named/SpectralBiclustering.fit", 1),
lrafeei marked this conversation as resolved.
Show resolved Hide resolved
],
"SpectralCoclustering": [
("Function/MLModel/Sklearn/Named/SpectralCoclustering.fit", 1),
],
"SpectralClustering": [
("Function/MLModel/Sklearn/Named/SpectralClustering.fit", 2),
("Function/MLModel/Sklearn/Named/SpectralClustering.fit_predict", 1),
],
}
expected_transaction_name = "test_cluster_models:_test"
if six.PY3:
expected_transaction_name = (
"test_cluster_models:test_below_v1_1_model_methods_wrapped_in_function_trace.<locals>._test"
lrafeei marked this conversation as resolved.
Show resolved Hide resolved
)

@validate_transaction_metrics(
expected_transaction_name,
scoped_metrics=expected_scoped_metrics[cluster_model_name],
rollup_metrics=expected_scoped_metrics[cluster_model_name],
background_task=True,
)
@background_task()
def _test():
run_cluster_model(cluster_model_name)

_test()


@pytest.mark.skipif(SKLEARN_VERSION < (1, 1, 0), reason="Requires sklearn > 1.1")
@pytest.mark.parametrize(
"cluster_model_name",
[
"BisectingKMeans",
"OPTICS",
],
)
def test_above_v1_1_model_methods_wrapped_in_function_trace(cluster_model_name, run_cluster_model):
expected_scoped_metrics = {
"BisectingKMeans": [
("Function/MLModel/Sklearn/Named/BisectingKMeans.fit", 2),
("Function/MLModel/Sklearn/Named/BisectingKMeans.predict", 1),
("Function/MLModel/Sklearn/Named/BisectingKMeans.fit_predict", 1),
],
"OPTICS": [
("Function/MLModel/Sklearn/Named/OPTICS.fit", 2),
("Function/MLModel/Sklearn/Named/OPTICS.fit_predict", 1),
],
}
expected_transaction_name = "test_cluster_models:_test"
if six.PY3:
expected_transaction_name = (
"test_cluster_models:test_above_v1_1_model_methods_wrapped_in_function_trace.<locals>._test"
)

@validate_transaction_metrics(
expected_transaction_name,
scoped_metrics=expected_scoped_metrics[cluster_model_name],
rollup_metrics=expected_scoped_metrics[cluster_model_name],
background_task=True,
)
@background_task()
def _test():
run_cluster_model(cluster_model_name)

_test()


@pytest.fixture
def run_cluster_model():
def _run(cluster_model_name):
import sklearn.cluster
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)

clf = getattr(sklearn.cluster, cluster_model_name)()

model = clf.fit(x_train, y_train)

if hasattr(model, "predict"):
lrafeei marked this conversation as resolved.
Show resolved Hide resolved
model.predict(x_test)
if hasattr(model, "score"):
model.score(x_test, y_test)
if hasattr(model, "fit_predict"):
model.fit_predict(x_test)
if hasattr(model, "predict_log_proba"):
model.predict_log_proba(x_test)
if hasattr(model, "predict_proba"):
model.predict_proba(x_test)
if hasattr(model, "transform"):
model.transform(x_test)

return model

return _run