From cb5f3f9f4c25b0561d2cd2adde02472ea4fce6a0 Mon Sep 17 00:00:00 2001 From: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Date: Tue, 20 Dec 2022 10:34:45 -0800 Subject: [PATCH] Add cluster model function traces (#700) * Add sklearn to tox * Add function traces around model methods * Support Python 2.7 & 3.7 sklearn * Add test for multiple calls to model method * Fixup: add comments & organize * Add ensemble models * Add ensemble model tests * Edit tests * Add ensemble library models from sklearn * Start tests with empty commit * Clean up tests * Add cluster model instrumentaton * Fix tests for various versions of sklearn * Fix ensemble tests with changes from tree PR * [Mega-Linter] Apply linters fixes * Fix some cluster model tests * Fix tests after ensemble PR merge * Add transform to tests * Remove accidental commits * Modify cluster tests to be more readable * Break up instrumentation models * Remove duplicate ensemble module defs * Modify VotingRegressor test Co-authored-by: Hannah Stepanek Co-authored-by: lrafeei --- newrelic/config.py | 114 +++++++++++ newrelic/hooks/mlmodel_sklearn.py | 37 ++++ tests/mlmodel_sklearn/test_cluster_models.py | 186 ++++++++++++++++++ tests/mlmodel_sklearn/test_ensemble_models.py | 2 +- 4 files changed, 338 insertions(+), 1 deletion(-) create mode 100644 tests/mlmodel_sklearn/test_cluster_models.py diff --git a/newrelic/config.py b/newrelic/config.py index 985443151..4e4406727 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2903,6 +2903,120 @@ def _process_module_builtin_defaults(): "instrument_sklearn_ensemble_hist_models", ) + _process_module_definition( + "sklearn.cluster._affinity_propagation", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster.affinity_propagation_", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster._agglomerative", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_agglomerative_models", + ) + + _process_module_definition( + "sklearn.cluster.hierarchical", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_agglomerative_models", + ) + + _process_module_definition( + "sklearn.cluster._birch", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster.birch", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster._bisect_k_means", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_kmeans_models", + ) + + _process_module_definition( + "sklearn.cluster._dbscan", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster.dbscan_", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster._feature_agglomeration", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster._kmeans", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_kmeans_models", + ) + + _process_module_definition( + "sklearn.cluster.k_means_", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_kmeans_models", + ) + + _process_module_definition( + "sklearn.cluster._mean_shift", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster.mean_shift_", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster._optics", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_models", + ) + + _process_module_definition( + "sklearn.cluster._spectral", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_clustering_models", + ) + + _process_module_definition( + "sklearn.cluster.spectral", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_clustering_models", + ) + + _process_module_definition( + "sklearn.cluster._bicluster", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_clustering_models", + ) + + _process_module_definition( + "sklearn.cluster.bicluster", + "newrelic.hooks.mlmodel_sklearn", + "instrument_sklearn_cluster_clustering_models", + ) + _process_module_definition( "rest_framework.views", "newrelic.hooks.component_djangorestframework", diff --git a/newrelic/hooks/mlmodel_sklearn.py b/newrelic/hooks/mlmodel_sklearn.py index 2dc67ad66..fa4ad2b43 100644 --- a/newrelic/hooks/mlmodel_sklearn.py +++ b/newrelic/hooks/mlmodel_sklearn.py @@ -260,6 +260,43 @@ def instrument_sklearn_ensemble_hist_models(module): _instrument_sklearn_models(module, model_classes) +def instrument_sklearn_cluster_models(module): + model_classes = ( + "AffinityPropagation", + "Birch", + "DBSCAN", + "MeanShift", + "OPTICS", + ) + _instrument_sklearn_models(module, model_classes) + + +def instrument_sklearn_cluster_agglomerative_models(module): + model_classes = ( + "AgglomerativeClustering", + "FeatureAgglomeration", + ) + _instrument_sklearn_models(module, model_classes) + + +def instrument_sklearn_cluster_clustering_models(module): + model_classes = ( + "SpectralBiclustering", + "SpectralCoclustering", + "SpectralClustering", + ) + _instrument_sklearn_models(module, model_classes) + + +def instrument_sklearn_cluster_kmeans_models(module): + model_classes = ( + "BisectingKMeans", + "KMeans", + "MiniBatchKMeans", + ) + _instrument_sklearn_models(module, model_classes) + + def instrument_sklearn_metrics(module): for scorer in METRIC_SCORERS: if hasattr(module, scorer): diff --git a/tests/mlmodel_sklearn/test_cluster_models.py b/tests/mlmodel_sklearn/test_cluster_models.py new file mode 100644 index 000000000..906995c22 --- /dev/null +++ b/tests/mlmodel_sklearn/test_cluster_models.py @@ -0,0 +1,186 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from sklearn import __version__ # noqa: this is needed for get_package_version +from testing_support.validators.validate_transaction_metrics import ( + validate_transaction_metrics, +) + +from newrelic.api.background_task import background_task +from newrelic.common.package_version_utils import get_package_version +from newrelic.packages import six + +SKLEARN_VERSION = tuple(map(int, get_package_version("sklearn").split("."))) + + +@pytest.mark.parametrize( + "cluster_model_name", + [ + "AffinityPropagation", + "AgglomerativeClustering", + "Birch", + "DBSCAN", + "FeatureAgglomeration", + "KMeans", + "MeanShift", + "MiniBatchKMeans", + "SpectralBiclustering", + "SpectralCoclustering", + "SpectralClustering", + ], +) +def test_below_v1_1_model_methods_wrapped_in_function_trace(cluster_model_name, run_cluster_model): + expected_scoped_metrics = { + "AffinityPropagation": [ + ("Function/MLModel/Sklearn/Named/AffinityPropagation.fit", 2), + ("Function/MLModel/Sklearn/Named/AffinityPropagation.predict", 1), + ("Function/MLModel/Sklearn/Named/AffinityPropagation.fit_predict", 1), + ], + "AgglomerativeClustering": [ + ("Function/MLModel/Sklearn/Named/AgglomerativeClustering.fit", 2), + ("Function/MLModel/Sklearn/Named/AgglomerativeClustering.fit_predict", 1), + ], + "Birch": [ + ("Function/MLModel/Sklearn/Named/Birch.fit", 2), + ( + "Function/MLModel/Sklearn/Named/Birch.predict", + 1 if SKLEARN_VERSION >= (1, 0, 0) else 3, + ), + ("Function/MLModel/Sklearn/Named/Birch.fit_predict", 1), + ("Function/MLModel/Sklearn/Named/Birch.transform", 1), + ], + "DBSCAN": [ + ("Function/MLModel/Sklearn/Named/DBSCAN.fit", 2), + ("Function/MLModel/Sklearn/Named/DBSCAN.fit_predict", 1), + ], + "FeatureAgglomeration": [ + ("Function/MLModel/Sklearn/Named/FeatureAgglomeration.fit", 1), + ("Function/MLModel/Sklearn/Named/FeatureAgglomeration.transform", 1), + ], + "KMeans": [ + ("Function/MLModel/Sklearn/Named/KMeans.fit", 2), + ("Function/MLModel/Sklearn/Named/KMeans.predict", 1), + ("Function/MLModel/Sklearn/Named/KMeans.fit_predict", 1), + ("Function/MLModel/Sklearn/Named/KMeans.transform", 1), + ], + "MeanShift": [ + ("Function/MLModel/Sklearn/Named/MeanShift.fit", 2), + ("Function/MLModel/Sklearn/Named/MeanShift.predict", 1), + ("Function/MLModel/Sklearn/Named/MeanShift.fit_predict", 1), + ], + "MiniBatchKMeans": [ + ("Function/MLModel/Sklearn/Named/MiniBatchKMeans.fit", 2), + ("Function/MLModel/Sklearn/Named/MiniBatchKMeans.predict", 1), + ("Function/MLModel/Sklearn/Named/MiniBatchKMeans.fit_predict", 1), + ], + "SpectralBiclustering": [ + ("Function/MLModel/Sklearn/Named/SpectralBiclustering.fit", 1), + ], + "SpectralCoclustering": [ + ("Function/MLModel/Sklearn/Named/SpectralCoclustering.fit", 1), + ], + "SpectralClustering": [ + ("Function/MLModel/Sklearn/Named/SpectralClustering.fit", 2), + ("Function/MLModel/Sklearn/Named/SpectralClustering.fit_predict", 1), + ], + } + expected_transaction_name = "test_cluster_models:_test" + if six.PY3: + expected_transaction_name = ( + "test_cluster_models:test_below_v1_1_model_methods_wrapped_in_function_trace.._test" + ) + + @validate_transaction_metrics( + expected_transaction_name, + scoped_metrics=expected_scoped_metrics[cluster_model_name], + rollup_metrics=expected_scoped_metrics[cluster_model_name], + background_task=True, + ) + @background_task() + def _test(): + run_cluster_model(cluster_model_name) + + _test() + + +@pytest.mark.skipif(SKLEARN_VERSION < (1, 1, 0), reason="Requires sklearn > 1.1") +@pytest.mark.parametrize( + "cluster_model_name", + [ + "BisectingKMeans", + "OPTICS", + ], +) +def test_above_v1_1_model_methods_wrapped_in_function_trace(cluster_model_name, run_cluster_model): + expected_scoped_metrics = { + "BisectingKMeans": [ + ("Function/MLModel/Sklearn/Named/BisectingKMeans.fit", 2), + ("Function/MLModel/Sklearn/Named/BisectingKMeans.predict", 1), + ("Function/MLModel/Sklearn/Named/BisectingKMeans.fit_predict", 1), + ], + "OPTICS": [ + ("Function/MLModel/Sklearn/Named/OPTICS.fit", 2), + ("Function/MLModel/Sklearn/Named/OPTICS.fit_predict", 1), + ], + } + expected_transaction_name = "test_cluster_models:_test" + if six.PY3: + expected_transaction_name = ( + "test_cluster_models:test_above_v1_1_model_methods_wrapped_in_function_trace.._test" + ) + + @validate_transaction_metrics( + expected_transaction_name, + scoped_metrics=expected_scoped_metrics[cluster_model_name], + rollup_metrics=expected_scoped_metrics[cluster_model_name], + background_task=True, + ) + @background_task() + def _test(): + run_cluster_model(cluster_model_name) + + _test() + + +@pytest.fixture +def run_cluster_model(): + def _run(cluster_model_name): + import sklearn.cluster + from sklearn.datasets import load_iris + from sklearn.model_selection import train_test_split + + X, y = load_iris(return_X_y=True) + x_train, x_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0) + + clf = getattr(sklearn.cluster, cluster_model_name)() + + model = clf.fit(x_train, y_train) + + if hasattr(model, "predict"): + model.predict(x_test) + if hasattr(model, "score"): + model.score(x_test, y_test) + if hasattr(model, "fit_predict"): + model.fit_predict(x_test) + if hasattr(model, "predict_log_proba"): + model.predict_log_proba(x_test) + if hasattr(model, "predict_proba"): + model.predict_proba(x_test) + if hasattr(model, "transform"): + model.transform(x_test) + + return model + + return _run diff --git a/tests/mlmodel_sklearn/test_ensemble_models.py b/tests/mlmodel_sklearn/test_ensemble_models.py index 29ade4cee..9daabdb5c 100644 --- a/tests/mlmodel_sklearn/test_ensemble_models.py +++ b/tests/mlmodel_sklearn/test_ensemble_models.py @@ -279,7 +279,7 @@ def _run(ensemble_model_name): "voting": "soft", } elif ensemble_model_name == "VotingRegressor": - kwargs = {"estimators": [("rf", RandomForestRegressor()), ("lr", LinearRegression())]} + kwargs = {"estimators": [("lr", LinearRegression())]} elif ensemble_model_name == "StackingRegressor": kwargs = {"estimators": [("rf", RandomForestRegressor())]} clf = getattr(sklearn.ensemble, ensemble_model_name)(**kwargs)