From 3ca2d283c8b1a73b77f7bba9cfa72dd59059bc50 Mon Sep 17 00:00:00 2001 From: Jaycee Li <102714969+jaycee-li@users.noreply.github.com> Date: Mon, 26 Sep 2022 09:00:06 -0700 Subject: [PATCH 01/14] Experiments complex metrics (#8) * feat: new class and API for metrics * update system test * update high level log method * fix system test * update example * change from system schema to google schema --- google/cloud/aiplatform/__init__.py | 4 + .../metadata/experiment_run_resource.py | 156 ++++++++++++++++++ google/cloud/aiplatform/metadata/metadata.py | 59 ++++++- samples/model-builder/conftest.py | 6 + .../log_classification_metrics_sample.py | 47 ++++++ .../log_classification_metrics_sample_test.py | 38 +++++ samples/model-builder/test_constants.py | 9 +- tests/system/aiplatform/test_experiments.py | 31 ++++ tests/unit/aiplatform/test_metadata.py | 72 ++++++++ 9 files changed, 419 insertions(+), 3 deletions(-) create mode 100644 samples/model-builder/experiment_tracking/log_classification_metrics_sample.py create mode 100644 samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index 8107756229..bc1043ef03 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -86,6 +86,9 @@ log_params = metadata.metadata._experiment_tracker.log_params log_metrics = metadata.metadata._experiment_tracker.log_metrics +log_classification_metrics = ( + metadata.metadata._experiment_tracker.log_classification_metrics +) get_experiment_df = metadata.metadata._experiment_tracker.get_experiment_df start_run = metadata.metadata._experiment_tracker.start_run start_execution = metadata.metadata._experiment_tracker.start_execution @@ -110,6 +113,7 @@ "log", "log_params", "log_metrics", + "log_classification_metrics", "log_time_series_metrics", "get_experiment_df", "get_pipeline_df", diff --git a/google/cloud/aiplatform/metadata/experiment_run_resource.py b/google/cloud/aiplatform/metadata/experiment_run_resource.py index d61b62b7b2..31026fb23b 100644 --- a/google/cloud/aiplatform/metadata/experiment_run_resource.py +++ b/google/cloud/aiplatform/metadata/experiment_run_resource.py @@ -39,6 +39,7 @@ from google.cloud.aiplatform.metadata import metadata from google.cloud.aiplatform.metadata import resource from google.cloud.aiplatform.metadata import utils as metadata_utils +from google.cloud.aiplatform.metadata import schema from google.cloud.aiplatform.tensorboard import tensorboard_resource from google.cloud.aiplatform.utils import rest_utils @@ -990,6 +991,103 @@ def log_metrics(self, metrics: Dict[str, Union[float, int, str]]): # TODO: query the latest metrics artifact resource before logging. self._metadata_node.update(metadata={constants._METRIC_KEY: metrics}) + def log_classification_metrics( + self, + *, + labels: Optional[List[str]] = None, + matrix: Optional[List[List[int]]] = None, + fpr: Optional[List[float]] = None, + tpr: Optional[List[float]] = None, + threshold: Optional[List[float]] = None, + display_name: Optional[str] = None, + ): + """Create an artifact for classification metrics and log to ExperimentRun. Currently support confusion matrix and ROC curve. + + ``` + my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment') + my_run.log_classification_metrics( + display_name='my-classification-metrics', + labels=['cat', 'dog'], + matrix=[[9, 1], [1, 9]], + fpr=[0.1, 0.5, 0.9], + tpr=[0.1, 0.7, 0.9], + threshold=[0.9, 0.5, 0.1], + ) + ``` + + Args: + labels (List[str]): + Optional. List of label names for the confusion matrix. Must be set if 'matrix' is set. + matrix (List[List[int]): + Optional. Values for the confusion matrix. Must be set if 'labels' is set. + fpr (List[float]): + Optional. List of false positive rates for the ROC curve. Must be set if 'tpr' or 'thresholds' is set. + tpr (List[float]): + Optional. List of true positive rates for the ROC curve. Must be set if 'fpr' or 'thresholds' is set. + threshold (List[float]): + Optional. List of thresholds for the ROC curve. Must be set if 'fpr' or 'tpr' is set. + display_name (str): + Optional. The user-defined name for the classification metric artifact. + + Raises: + ValueError: if 'labels' and 'matrix' are not set together + or if 'labels' and 'matrix' are not in the same length + or if 'fpr' and 'tpr' and 'threshold' are not set together + or if 'fpr' and 'tpr' and 'threshold' are not in the same length + """ + if (labels or matrix) and not (labels and matrix): + raise ValueError("labels and matrix must be set together.") + + if (fpr or tpr or threshold) and not (fpr and tpr and threshold): + raise ValueError("fpr, tpr, and thresholds must be set together.") + + metadata = {} + if labels and matrix: + if len(matrix) != len(labels): + raise ValueError( + "Length of labels and matrix must be the same. " + "Got lengths {} and {} respectively.".format( + len(labels), len(matrix) + ) + ) + + confusion_matrix = { + "annotationSpecs": [{"displayName": label} for label in labels], + "rows": matrix, + } + metadata["confusionMatrix"] = confusion_matrix + + if fpr and tpr and threshold: + if ( + len(fpr) != len(tpr) + or len(fpr) != len(threshold) + or len(tpr) != len(threshold) + ): + raise ValueError( + "Length of fpr, tpr and threshold must be the same. " + "Got lengths {}, {} and {} respectively.".format( + len(fpr), len(tpr), len(threshold) + ) + ) + + metadata["confidenceMetrics"] = [ + { + "confidenceThreshold": threshold[i], + "recall": tpr[i], + "falsePositiveRate": fpr[i], + } + for i in range(len(fpr)) + ] + + classification_metrics = schema.google.artifact_schema.ClassificationMetrics( + display_name=display_name, + metadata=metadata, + ) + classfication_metrics = classification_metrics.create() + self._metadata_node.add_artifacts_and_executions( + artifact_resource_names=[classfication_metrics.resource_name] + ) + @_v1_not_supported def get_time_series_data_frame(self) -> "pd.DataFrame": # noqa: F821 """Returns all time series in this Run as a DataFrame. @@ -1149,6 +1247,64 @@ def get_metrics(self) -> Dict[str, Union[float, int, str]]: else: return self._metadata_node.metadata[constants._METRIC_KEY] + def get_classification_metrics(self) -> List[Dict[str, Union[str, List]]]: + """Get all the classification metrics logged to this run. + + ``` + my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment') + metric = my_run.get_classification_metrics()[0] + print(metric) + ## print result: + { + "id": "e6c893a4-222e-4c60-a028-6a3b95dfc109", + "display_name": "my-classification-metrics", + "labels": ["cat", "dog"], + "matrix": [[9,1], [1,9]], + "fpr": [0.1, 0.5, 0.9], + "tpr": [0.1, 0.7, 0.9], + "thresholds": [0.9, 0.5, 0.1] + } + ``` + + Returns: + List of classification metrics logged to this experiment run. + """ + + artifact_list = artifact.Artifact.list( + filter=metadata_utils._make_filter_string( + in_context=[self.resource_name], + schema_title="google.ClassificationMetrics", + ), + project=self.project, + location=self.location, + credentials=self.credentials, + ) + + metrics = [] + for metric_artifact in artifact_list: + metric = {} + metric["id"] = metric_artifact.name + metric["display_name"] = metric_artifact.display_name + metadata = metric_artifact.metadata + if "confusionMatrix" in metadata: + metric["labels"] = [ + d["displayName"] + for d in metadata["confusionMatrix"]["annotationSpecs"] + ] + metric["matrix"] = metadata["confusionMatrix"]["rows"] + + if "confidenceMetrics" in metadata: + metric["fpr"] = [ + d["falsePositiveRate"] for d in metadata["confidenceMetrics"] + ] + metric["tpr"] = [d["recall"] for d in metadata["confidenceMetrics"]] + metric["threshold"] = [ + d["confidenceThreshold"] for d in metadata["confidenceMetrics"] + ] + metrics.append(metric) + + return metrics + @_v1_not_supported def associate_execution(self, execution: execution.Execution): """Associate an execution to this experiment run. diff --git a/google/cloud/aiplatform/metadata/metadata.py b/google/cloud/aiplatform/metadata/metadata.py index 6f67a6ddf6..d103a79733 100644 --- a/google/cloud/aiplatform/metadata/metadata.py +++ b/google/cloud/aiplatform/metadata/metadata.py @@ -15,8 +15,7 @@ # limitations under the License. # - -from typing import Dict, Union, Optional, Any +from typing import Dict, Union, Optional, Any, List from google.api_core import exceptions from google.auth import credentials as auth_credentials @@ -371,6 +370,62 @@ def log_metrics(self, metrics: Dict[str, Union[float, int, str]]): # query the latest metrics artifact resource before logging. self._experiment_run.log_metrics(metrics=metrics) + def log_classification_metrics( + self, + *, + labels: Optional[List[str]] = None, + matrix: Optional[List[List[int]]] = None, + fpr: Optional[List[float]] = None, + tpr: Optional[List[float]] = None, + threshold: Optional[List[float]] = None, + display_name: Optional[str] = None, + ): + """Create an artifact for classification metrics and log to ExperimentRun. Currently support confusion matrix and ROC curve. + + ``` + my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment') + my_run.log_classification_metrics( + display_name='my-classification-metrics', + labels=['cat', 'dog'], + matrix=[[9, 1], [1, 9]], + fpr=[0.1, 0.5, 0.9], + tpr=[0.1, 0.7, 0.9], + threshold=[0.9, 0.5, 0.1], + ) + ``` + + Args: + labels (List[str]): + Optional. List of label names for the confusion matrix. Must be set if 'matrix' is set. + matrix (List[List[int]): + Optional. Values for the confusion matrix. Must be set if 'labels' is set. + fpr (List[float]): + Optional. List of false positive rates for the ROC curve. Must be set if 'tpr' or 'thresholds' is set. + tpr (List[float]): + Optional. List of true positive rates for the ROC curve. Must be set if 'fpr' or 'thresholds' is set. + threshold (List[float]): + Optional. List of thresholds for the ROC curve. Must be set if 'fpr' or 'tpr' is set. + display_name (str): + Optional. The user-defined name for the classification metric artifact. + + Raises: + ValueError: if 'labels' and 'matrix' are not set together + or if 'labels' and 'matrix' are not in the same length + or if 'fpr' and 'tpr' and 'threshold' are not set together + or if 'fpr' and 'tpr' and 'threshold' are not in the same length + """ + + self._validate_experiment_and_run(method_name="log_classification_metrics") + # query the latest metrics artifact resource before logging. + self._experiment_run.log_classification_metrics( + display_name=display_name, + labels=labels, + matrix=matrix, + fpr=fpr, + tpr=tpr, + threshold=threshold, + ) + def _validate_experiment_and_run(self, method_name: str): """Validates Experiment and Run are set and raises informative error message. diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py index 0f984aa7ee..a94ba6fe15 100644 --- a/samples/model-builder/conftest.py +++ b/samples/model-builder/conftest.py @@ -824,6 +824,12 @@ def mock_log_params(): mock_log_params.return_value = None yield mock_log_params +@pytest.fixture +def mock_log_classification_metrics(): + with patch.object(aiplatform, "log_classification_metrics") as mock_log_metrics: + mock_log_metrics.return_value = None + yield mock_log_metrics + @pytest.fixture def mock_log_pipeline_job(): diff --git a/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py new file mode 100644 index 0000000000..7b908e5b13 --- /dev/null +++ b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py @@ -0,0 +1,47 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_log_classification_metrics_sample] +def log_classification_metrics_sample( + experiment_name: str, + run_name: str, + project: str, + location: str, + labels: Optional[List[str]] = None, + matrix: Optional[List[List[int]]] = None, + fpr: Optional[List[float]] = None, + tpr: Optional[List[float]] = None, + threshold: Optional[List[float]] = None, + display_name: Optional[str] = None, +): + aiplatform.init(experiment=experiment_name, project=project, location=location) + + aiplatform.start_run(run=run_name, resume=True) + + aiplatform.log_classification_metrics( + labels=labels, + matrix=matrix, + fpr=fpr, + tpr=tpr, + threshold=threshold, + display_name=display_name, + ) + + +# [END aiplatform_sdk_log_params_sample] diff --git a/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py b/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py new file mode 100644 index 0000000000..794fcc413b --- /dev/null +++ b/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py @@ -0,0 +1,38 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import log_classification_metrics_sample + +import pytest + +import test_constants as constants + + +@pytest.mark.usefixtures("mock_sdk_init", "mock_start_run") +def test_log_metrics_sample(mock_log_classification_metrics): + + log_classification_metrics_sample.log_classification_metrics_sample( + experiment_name=constants.EXPERIMENT_NAME, + run_name=constants.EXPERIMENT_RUN_NAME, + project=constants.PROJECT, + location=constants.LOCATION, + labels=constants.CLASSIFICATION_METRICS["labels"], + matrix=constants.CLASSIFICATION_METRICS["matrix"], + fpr=constants.CLASSIFICATION_METRICS["fpr"], + tpr=constants.CLASSIFICATION_METRICS["tpr"], + threshold=constants.CLASSIFICATION_METRICS["threshold"], + display_name=constants.CLASSIFICATION_METRICS["display_name"], + ) + + mock_log_classification_metrics.assert_called_with(constants.CLASSIFICATION_METRICS) diff --git a/samples/model-builder/test_constants.py b/samples/model-builder/test_constants.py index 1ff2b1d96e..76f8d7673b 100644 --- a/samples/model-builder/test_constants.py +++ b/samples/model-builder/test_constants.py @@ -272,7 +272,14 @@ METRICS = {"accuracy": 0.1} PARAMS = {"learning_rate": 0.1} - +CLASSIFICATION_METRICS = { + "display_name": "my-classification-metrics", + "labels": ["cat", "dog"], + "matrix": [[9, 1], [1, 9]], + "fpr": [0.1, 0.5, 0.9], + "tpr": [0.1, 0.7, 0.9], + "threshold": [0.9, 0.5, 0.1], +} TEMPLATE_PATH = "pipeline.json" STEP = 1 diff --git a/tests/system/aiplatform/test_experiments.py b/tests/system/aiplatform/test_experiments.py index ada7c68f82..83d96d945e 100644 --- a/tests/system/aiplatform/test_experiments.py +++ b/tests/system/aiplatform/test_experiments.py @@ -37,6 +37,15 @@ _TIME_SERIES_METRIC_KEY = "accuracy" +_CLASSIFICATION_METRICS = { + "display_name": "my-classification-metrics", + "labels": ["cat", "dog"], + "matrix": [[9, 1], [1, 9]], + "fpr": [0.1, 0.5, 0.9], + "tpr": [0.1, 0.7, 0.9], + "threshold": [0.9, 0.5, 0.1], +} + @pytest.mark.usefixtures( "prepare_staging_bucket", "delete_staging_bucket", "tear_down_resources" @@ -145,6 +154,28 @@ def test_log_time_series_metrics(self): _TIME_SERIES_METRIC_KEY: [float(value) for value in range(5)], } + def test_log_classification_metrics(self, shared_state): + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + experiment=self._experiment_name, + ) + aiplatform.start_run(_RUN, resume=True) + aiplatform.log_classification_metrics( + display_name=_CLASSIFICATION_METRICS["display_name"], + labels=_CLASSIFICATION_METRICS["labels"], + matrix=_CLASSIFICATION_METRICS["matrix"], + fpr=_CLASSIFICATION_METRICS["fpr"], + tpr=_CLASSIFICATION_METRICS["tpr"], + threshold=_CLASSIFICATION_METRICS["threshold"], + ) + + run = aiplatform.ExperimentRun(run_name=_RUN, experiment=self._experiment_name) + metrics = run.get_classification_metrics()[0] + metric_artifact = aiplatform.Artifact(metrics.pop("id")) + assert metrics == _CLASSIFICATION_METRICS + metric_artifact.delete() + def test_create_artifact(self, shared_state): ds = aiplatform.Artifact.create( schema_title="system.Dataset", diff --git a/tests/unit/aiplatform/test_metadata.py b/tests/unit/aiplatform/test_metadata.py index ba5a527683..3dacac2547 100644 --- a/tests/unit/aiplatform/test_metadata.py +++ b/tests/unit/aiplatform/test_metadata.py @@ -123,6 +123,16 @@ _TEST_METRICS = {_TEST_METRIC_KEY_1: 222, _TEST_METRIC_KEY_2: 1} _TEST_OTHER_METRICS = {_TEST_METRIC_KEY_2: 0.9} +# classification_metrics +_TEST_CLASSIFICATION_METRICS = { + "display_name": "my-classification-metrics", + "labels": ["cat", "dog"], + "matrix": [[9, 1], [1, 9]], + "fpr": [0.1, 0.5, 0.9], + "tpr": [0.1, 0.7, 0.9], + "threshold": [0.9, 0.5, 0.1], +} + # schema _TEST_WRONG_SCHEMA_TITLE = "system.WrongSchema" @@ -408,6 +418,34 @@ def query_execution_inputs_and_outputs_mock(): yield query_execution_inputs_and_outputs_mock +_TEST_CLASSIFICATION_METRICS_METADATA = { + "confusionMatrix": { + "annotationSpecs": [{"displayName": "cat"}, {"displayName": "dog"}], + "rows": [{"row": [9, 1]}, {"row": [1, 9]}], + }, + "confidenceMetrics": [ + {"confidenceThreshold": 0.9, "recall": 0.1, "falsePositiveRate": 0.1}, + {"confidenceThreshold": 0.5, "recall": 0.5, "falsePositiveRate": 0.7}, + {"confidenceThreshold": 0.1, "recall": 0.9, "falsePositiveRate": 0.9}, + ], +} + +_TEST_CLASSIFICATION_METRICS_ARTIFACT = GapicArtifact( + name=_TEST_ARTIFACT_NAME, + display_name=_TEST_CLASSIFICATION_METRICS["display_name"], + schema_title=constants.GOOGLE_CLASSIFICATION_METRICS, + schema_version=constants._DEFAULT_SCHEMA_VERSION, + metadata=_TEST_CLASSIFICATION_METRICS_METADATA, +) + + +@pytest.fixture +def create_artifact_mock(): + with patch.object(MetadataServiceClient, "create_artifact") as create_artifact_mock: + create_artifact_mock.return_value = _TEST_CLASSIFICATION_METRICS_ARTIFACT + yield create_artifact_mock + + @pytest.fixture def get_artifact_mock(): with patch.object(MetadataServiceClient, "get_artifact") as get_artifact_mock: @@ -1131,6 +1169,40 @@ def test_log_metrics(self, update_context_mock): update_context_mock.assert_called_once_with(context=_TRUE_CONTEXT) + @pytest.mark.usefixtures( + "get_metadata_store_mock", + "get_experiment_mock", + "create_experiment_run_context_mock", + "add_context_children_mock", + ) + def test_log_classification_metrics( + self, + create_artifact_mock, + add_context_artifacts_and_executions_mock, + ): + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + experiment=_TEST_EXPERIMENT, + ) + aiplatform.start_run(_TEST_RUN) + aiplatform.log_classification_metrics( + display_name=_TEST_CLASSIFICATION_METRICS["display_name"], + labels=_TEST_CLASSIFICATION_METRICS["labels"], + matrix=_TEST_CLASSIFICATION_METRICS["matrix"], + fpr=_TEST_CLASSIFICATION_METRICS["fpr"], + tpr=_TEST_CLASSIFICATION_METRICS["tpr"], + threshold=_TEST_CLASSIFICATION_METRICS["threshold"], + ) + + create_artifact_mock.assert_called_once_with( + metadata=_TEST_CLASSIFICATION_METRICS_METADATA + ) + + add_context_artifacts_and_executions_mock.assert_called_once_with( + artifact_resource_names=[_TEST_ARTIFACT_NAME] + ) + @pytest.mark.usefixtures( "get_metadata_store_mock", "get_experiment_mock", From ee36af93f08f860813b5fc863c635a766e5e497e Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Mon, 26 Sep 2022 09:15:36 -0700 Subject: [PATCH 02/14] fix: import error --- google/cloud/aiplatform/metadata/experiment_run_resource.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/google/cloud/aiplatform/metadata/experiment_run_resource.py b/google/cloud/aiplatform/metadata/experiment_run_resource.py index 31026fb23b..3aedaee44d 100644 --- a/google/cloud/aiplatform/metadata/experiment_run_resource.py +++ b/google/cloud/aiplatform/metadata/experiment_run_resource.py @@ -39,7 +39,9 @@ from google.cloud.aiplatform.metadata import metadata from google.cloud.aiplatform.metadata import resource from google.cloud.aiplatform.metadata import utils as metadata_utils -from google.cloud.aiplatform.metadata import schema +from google.cloud.aiplatform.metadata.schema.google import ( + artifact_schema as google_artifact_schema, +) from google.cloud.aiplatform.tensorboard import tensorboard_resource from google.cloud.aiplatform.utils import rest_utils @@ -1079,7 +1081,7 @@ def log_classification_metrics( for i in range(len(fpr)) ] - classification_metrics = schema.google.artifact_schema.ClassificationMetrics( + classification_metrics = google_artifact_schema.ClassificationMetrics( display_name=display_name, metadata=metadata, ) From c1aa713f71e8c23ea83c72ab087a4e1176b0c4c6 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Mon, 26 Sep 2022 09:26:24 -0700 Subject: [PATCH 03/14] Update log_classification_metrics_sample.py --- .../experiment_tracking/log_classification_metrics_sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py index 7b908e5b13..0f93ab35bf 100644 --- a/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py +++ b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py @@ -44,4 +44,4 @@ def log_classification_metrics_sample( ) -# [END aiplatform_sdk_log_params_sample] +# [END aiplatform_sdk_log_classification_metrics_sample] From 0ea90bf1090e6edd88ce52ef17e6549ccbfc8f2f Mon Sep 17 00:00:00 2001 From: Jaycee Li <102714969+jaycee-li@users.noreply.github.com> Date: Mon, 26 Sep 2022 09:43:09 -0700 Subject: [PATCH 04/14] Update samples/model-builder/experiment_tracking/log_classification_metrics_sample.py Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> --- .../experiment_tracking/log_classification_metrics_sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py index 0f93ab35bf..e178356c9c 100644 --- a/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py +++ b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py @@ -29,7 +29,7 @@ def log_classification_metrics_sample( tpr: Optional[List[float]] = None, threshold: Optional[List[float]] = None, display_name: Optional[str] = None, -): +) -> None: aiplatform.init(experiment=experiment_name, project=project, location=location) aiplatform.start_run(run=run_name, resume=True) From 33b03ff7b31c5d8a6ee57a22f8e67d8f57329ca9 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Mon, 26 Sep 2022 09:56:33 -0700 Subject: [PATCH 05/14] Update log_classification_metrics_sample_test.py --- .../log_classification_metrics_sample_test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py b/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py index 794fcc413b..c15fd0b123 100644 --- a/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py +++ b/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py @@ -35,4 +35,11 @@ def test_log_metrics_sample(mock_log_classification_metrics): display_name=constants.CLASSIFICATION_METRICS["display_name"], ) - mock_log_classification_metrics.assert_called_with(constants.CLASSIFICATION_METRICS) + mock_log_classification_metrics.assert_called_with( + labels=constants.CLASSIFICATION_METRICS["labels"], + matrix=constants.CLASSIFICATION_METRICS["matrix"], + fpr=constants.CLASSIFICATION_METRICS["fpr"], + tpr=constants.CLASSIFICATION_METRICS["tpr"], + threshold=constants.CLASSIFICATION_METRICS["threshold"], + display_name=constants.CLASSIFICATION_METRICS["display_name"], + ) From 5bf0d13f146cfd0705be97987ff784bf0d331c72 Mon Sep 17 00:00:00 2001 From: Jaycee Li <102714969+jaycee-li@users.noreply.github.com> Date: Mon, 26 Sep 2022 10:59:45 -0700 Subject: [PATCH 06/14] Update samples/model-builder/conftest.py Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> --- samples/model-builder/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py index a94ba6fe15..a303a3be6d 100644 --- a/samples/model-builder/conftest.py +++ b/samples/model-builder/conftest.py @@ -824,6 +824,7 @@ def mock_log_params(): mock_log_params.return_value = None yield mock_log_params + @pytest.fixture def mock_log_classification_metrics(): with patch.object(aiplatform, "log_classification_metrics") as mock_log_metrics: From df5c5d1b3f21f36289b32228048a634b34ea44ab Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Mon, 26 Sep 2022 13:20:47 -0700 Subject: [PATCH 07/14] fix: unit test --- tests/unit/aiplatform/test_metadata.py | 41 ++++++++++++++++++-------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tests/unit/aiplatform/test_metadata.py b/tests/unit/aiplatform/test_metadata.py index 3dacac2547..aa0907def1 100644 --- a/tests/unit/aiplatform/test_metadata.py +++ b/tests/unit/aiplatform/test_metadata.py @@ -54,8 +54,12 @@ from google.cloud.aiplatform.metadata import constants from google.cloud.aiplatform.metadata import experiment_run_resource from google.cloud.aiplatform.metadata import metadata +from google.cloud.aiplatform.metadata import artifact +from google.cloud.aiplatform.metadata import context from google.cloud.aiplatform.metadata import metadata_store from google.cloud.aiplatform.metadata import utils as metadata_utils +from google.cloud.aiplatform.metadata.schema import base_artifact + from google.cloud.aiplatform import utils from test_pipeline_jobs import mock_pipeline_service_get # noqa: F401 @@ -430,7 +434,7 @@ def query_execution_inputs_and_outputs_mock(): ], } -_TEST_CLASSIFICATION_METRICS_ARTIFACT = GapicArtifact( +_TEST_CLASSIFICATION_METRICS_ARTIFACT_RESOURCE = GapicArtifact( name=_TEST_ARTIFACT_NAME, display_name=_TEST_CLASSIFICATION_METRICS["display_name"], schema_title=constants.GOOGLE_CLASSIFICATION_METRICS, @@ -438,12 +442,29 @@ def query_execution_inputs_and_outputs_mock(): metadata=_TEST_CLASSIFICATION_METRICS_METADATA, ) +_TEST_CLASSIFICATION_METRICS_ARTIFACT = artifact.Artifact._empty_constructor() +_TEST_CLASSIFICATION_METRICS_ARTIFACT._gca_resource = ( + _TEST_CLASSIFICATION_METRICS_ARTIFACT_RESOURCE +) + @pytest.fixture -def create_artifact_mock(): - with patch.object(MetadataServiceClient, "create_artifact") as create_artifact_mock: - create_artifact_mock.return_value = _TEST_CLASSIFICATION_METRICS_ARTIFACT - yield create_artifact_mock +def create_classification_metrics_artifact_mock(): + with patch.object( + base_artifact.BaseArtifactSchema, "create" + ) as create_classification_metrics_artifact_mock: + create_classification_metrics_artifact_mock.return_value = ( + _TEST_CLASSIFICATION_METRICS_ARTIFACT + ) + yield create_classification_metrics_artifact_mock + + +@pytest.fixture +def context_add_artifacts_and_executions_mock(): + with patch.object( + context.Context, "add_artifacts_and_executions" + ) as context_add_artifacts_and_executions_mock: + yield context_add_artifacts_and_executions_mock @pytest.fixture @@ -1174,11 +1195,11 @@ def test_log_metrics(self, update_context_mock): "get_experiment_mock", "create_experiment_run_context_mock", "add_context_children_mock", + "create_classification_metrics_artifact_mock", ) def test_log_classification_metrics( self, - create_artifact_mock, - add_context_artifacts_and_executions_mock, + context_add_artifacts_and_executions_mock, ): aiplatform.init( project=_TEST_PROJECT, @@ -1195,11 +1216,7 @@ def test_log_classification_metrics( threshold=_TEST_CLASSIFICATION_METRICS["threshold"], ) - create_artifact_mock.assert_called_once_with( - metadata=_TEST_CLASSIFICATION_METRICS_METADATA - ) - - add_context_artifacts_and_executions_mock.assert_called_once_with( + context_add_artifacts_and_executions_mock.assert_called_once_with( artifact_resource_names=[_TEST_ARTIFACT_NAME] ) From a82194a6c5d6a0dd9f467009d9074b8e2bb1ca51 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Wed, 28 Sep 2022 12:50:35 -0700 Subject: [PATCH 08/14] fix comments --- .../aiplatform/metadata/experiment_run_resource.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/google/cloud/aiplatform/metadata/experiment_run_resource.py b/google/cloud/aiplatform/metadata/experiment_run_resource.py index 3aedaee44d..a17d616b86 100644 --- a/google/cloud/aiplatform/metadata/experiment_run_resource.py +++ b/google/cloud/aiplatform/metadata/experiment_run_resource.py @@ -993,6 +993,7 @@ def log_metrics(self, metrics: Dict[str, Union[float, int, str]]): # TODO: query the latest metrics artifact resource before logging. self._metadata_node.update(metadata={constants._METRIC_KEY: metrics}) + @_v1_not_supported def log_classification_metrics( self, *, @@ -1003,7 +1004,7 @@ def log_classification_metrics( threshold: Optional[List[float]] = None, display_name: Optional[str] = None, ): - """Create an artifact for classification metrics and log to ExperimentRun. Currently support confusion matrix and ROC curve. + """Create an artifact for classification metrics and log to ExperimentRun. Currently supports confusion matrix and ROC curve. ``` my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment') @@ -1074,11 +1075,13 @@ def log_classification_metrics( metadata["confidenceMetrics"] = [ { - "confidenceThreshold": threshold[i], - "recall": tpr[i], - "falsePositiveRate": fpr[i], + "confidenceThreshold": confidenceThreshold, + "recall": recall, + "falsePositiveRate": falsePositiveRate, } - for i in range(len(fpr)) + for falsePositiveRate, recall, confidenceThreshold in zip( + fpr, tpr, threshold + ) ] classification_metrics = google_artifact_schema.ClassificationMetrics( @@ -1249,6 +1252,7 @@ def get_metrics(self) -> Dict[str, Union[float, int, str]]: else: return self._metadata_node.metadata[constants._METRIC_KEY] + @_v1_not_supported def get_classification_metrics(self) -> List[Dict[str, Union[str, List]]]: """Get all the classification metrics logged to this run. From 9c04a50a485a88dc074531a00888c1eb5b6be7b3 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Wed, 28 Sep 2022 18:33:20 -0700 Subject: [PATCH 09/14] fix comments and update google.ClassificationMetrics --- tests/unit/aiplatform/test_metadata_schema.py | 55 ++++++++++++++++--- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/tests/unit/aiplatform/test_metadata_schema.py b/tests/unit/aiplatform/test_metadata_schema.py index 0003838968..2a1705feeb 100644 --- a/tests/unit/aiplatform/test_metadata_schema.py +++ b/tests/unit/aiplatform/test_metadata_schema.py @@ -64,7 +64,6 @@ _TEST_DESCRIPTION = "test description" _TEST_METADATA = {"test-param1": 1, "test-param2": "test-value", "test-param3": True} _TEST_UPDATED_METADATA = { - "test-param1": 2, "test-param2": "test-value-1", "test-param3": False, } @@ -748,14 +747,46 @@ def test_classification_metrics_title_is_set_correctly(self): assert artifact.schema_title == "google.ClassificationMetrics" def test_classification_metrics_constructor_parameters_are_set_correctly(self): + aggregation_type = "MACRO_AVERAGE" + aggregation_threshold = 0.5 + recall = 0.5 + precision = 0.5 + f1_score = 0.5 + accuracy = 0.5 au_prc = 1.0 au_roc = 2.0 log_loss = 0.5 + confusion_matrix = utils.ConfusionMatrix( + matrix=[[9.0, 1.0], [1.0, 9.0]], + annotation_specs=[ + utils.AnnotationSpec(display_name="cat"), + utils.AnnotationSpec(display_name="dog"), + ], + ) + confidence_metrics = [ + utils.ConfidenceMetric( + confidence_threshold=0.9, recall=0.1, false_positive_rate=0.1 + ), + utils.ConfidenceMetric( + confidence_threshold=0.5, recall=0.5, false_positive_rate=0.7 + ), + utils.ConfidenceMetric( + confidence_threshold=0.1, recall=0.9, false_positive_rate=0.9 + ), + ] artifact = google_artifact_schema.ClassificationMetrics( + aggregation_type=aggregation_type, + aggregation_threshold=aggregation_threshold, + recall=recall, + precision=precision, + f1_score=f1_score, + accuracy=accuracy, au_prc=au_prc, au_roc=au_roc, log_loss=log_loss, + confusion_matrix=confusion_matrix, + confidence_metrics=confidence_metrics, artifact_id=_TEST_ARTIFACT_ID, uri=_TEST_URI, display_name=_TEST_DISPLAY_NAME, @@ -764,12 +795,22 @@ def test_classification_metrics_constructor_parameters_are_set_correctly(self): metadata=_TEST_UPDATED_METADATA, ) expected_metadata = { - "test-param1": 2.0, - "test-param2": "test-value-1", - "test-param3": False, - "auPrc": 1.0, - "auRoc": 2.0, - "logLoss": 0.5, + "test-param1": _TEST_UPDATED_METADATA["test-param1"], + "test-param2": _TEST_UPDATED_METADATA["test-param2"], + "test-param3": _TEST_UPDATED_METADATA["test-param3"], + "aggregationType": aggregation_type, + "aggregationThreshold": aggregation_threshold, + "recall": recall, + "precision": precision, + "f1Score": f1_score, + "accuracy": accuracy, + "auPrc": au_prc, + "auRoc": au_roc, + "logLoss": log_loss, + "confusionMatrix": confusion_matrix.to_dict(), + "confidenceMetrics": [ + confidence_metric.to_dict() for confidence_metric in confidence_metrics + ], } assert artifact.artifact_id == _TEST_ARTIFACT_ID From 796f196804935a5b5eda0ee969bb73142632f2e6 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Wed, 28 Sep 2022 18:36:30 -0700 Subject: [PATCH 10/14] fix comments and update ClassificationMetrics class --- .../metadata/experiment_run_resource.py | 37 +++-- .../metadata/schema/google/artifact_schema.py | 58 ++++++- .../cloud/aiplatform/metadata/schema/utils.py | 148 ++++++++++++++++++ tests/unit/aiplatform/test_metadata.py | 56 ++++--- tests/unit/aiplatform/test_metadata_schema.py | 1 + 5 files changed, 262 insertions(+), 38 deletions(-) diff --git a/google/cloud/aiplatform/metadata/experiment_run_resource.py b/google/cloud/aiplatform/metadata/experiment_run_resource.py index a17d616b86..326948dcec 100644 --- a/google/cloud/aiplatform/metadata/experiment_run_resource.py +++ b/google/cloud/aiplatform/metadata/experiment_run_resource.py @@ -39,6 +39,7 @@ from google.cloud.aiplatform.metadata import metadata from google.cloud.aiplatform.metadata import resource from google.cloud.aiplatform.metadata import utils as metadata_utils +from google.cloud.aiplatform.metadata.schema import utils as schema_utils from google.cloud.aiplatform.metadata.schema.google import ( artifact_schema as google_artifact_schema, ) @@ -1044,7 +1045,6 @@ def log_classification_metrics( if (fpr or tpr or threshold) and not (fpr and tpr and threshold): raise ValueError("fpr, tpr, and thresholds must be set together.") - metadata = {} if labels and matrix: if len(matrix) != len(labels): raise ValueError( @@ -1053,12 +1053,13 @@ def log_classification_metrics( len(labels), len(matrix) ) ) - - confusion_matrix = { - "annotationSpecs": [{"displayName": label} for label in labels], - "rows": matrix, - } - metadata["confusionMatrix"] = confusion_matrix + annotation_specs = [ + schema_utils.AnnotationSpec(display_name=label) for label in labels + ] + confusion_matrix = schema_utils.ConfusionMatrix( + annotation_specs=annotation_specs, + matrix=matrix, + ) if fpr and tpr and threshold: if ( @@ -1073,21 +1074,23 @@ def log_classification_metrics( ) ) - metadata["confidenceMetrics"] = [ - { - "confidenceThreshold": confidenceThreshold, - "recall": recall, - "falsePositiveRate": falsePositiveRate, - } - for falsePositiveRate, recall, confidenceThreshold in zip( - fpr, tpr, threshold + confidence_metrics = [ + schema_utils.ConfidenceMetric( + confidence_threshold=confidence_threshold, + false_positive_rate=false_positive_rate, + recall=recall, + ) + for confidence_threshold, false_positive_rate, recall in zip( + threshold, fpr, tpr ) ] classification_metrics = google_artifact_schema.ClassificationMetrics( display_name=display_name, - metadata=metadata, + confusion_matrix=confusion_matrix, + confidence_metrics=confidence_metrics, ) + classfication_metrics = classification_metrics.create() self._metadata_node.add_artifacts_and_executions( artifact_resource_names=[classfication_metrics.resource_name] @@ -1279,7 +1282,7 @@ def get_classification_metrics(self) -> List[Dict[str, Union[str, List]]]: artifact_list = artifact.Artifact.list( filter=metadata_utils._make_filter_string( in_context=[self.resource_name], - schema_title="google.ClassificationMetrics", + schema_title=google_artifact_schema.ClassificationMetrics.schema_title, ), project=self.project, location=self.location, diff --git a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py index e52f2f98b5..e1b25f048d 100644 --- a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py +++ b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py @@ -15,7 +15,7 @@ # limitations under the License. import copy -from typing import Optional, Dict +from typing import Optional, Dict, List from google.cloud.aiplatform.compat.types import artifact as gca_artifact from google.cloud.aiplatform.metadata.schema import base_artifact @@ -24,6 +24,12 @@ # The artifact property key for the resource_name _ARTIFACT_PROPERTY_KEY_RESOURCE_NAME = "resourceName" +_CLASSIFICATION_METRICS_AGGREGATION_TYPE = [ + "AGGREGATION_TYPE_UNSPECIFIED", + "MACRO_AVERAGE", + "MICRO_AVERAGE", +] + class VertexDataset(base_artifact.BaseArtifactSchema): """An artifact representing a Vertex Dataset.""" @@ -278,9 +284,17 @@ class ClassificationMetrics(base_artifact.BaseArtifactSchema): def __init__( self, *, + aggregation_type: Optional[str] = None, + aggregation_threshold: Optional[float] = None, + recall: Optional[float] = None, + precision: Optional[float] = None, + f1_score: Optional[float] = None, + accuracy: Optional[float] = None, au_prc: Optional[float] = None, au_roc: Optional[float] = None, log_loss: Optional[float] = None, + confusion_matrix: Optional[utils.ConfusionMatrix] = None, + confidence_metrics: Optional[List[utils.ConfidenceMetric]] = None, artifact_id: Optional[str] = None, uri: Optional[str] = None, display_name: Optional[str] = None, @@ -290,6 +304,22 @@ def __init__( state: Optional[gca_artifact.Artifact.State] = gca_artifact.Artifact.State.LIVE, ): """Args: + aggregation_type (str): + Optional. The way to generate the aggregated metrics. Choose from the following options: + "AGGREGATION_TYPE_UNSPECIFIED": Indicating unset, used for per-class sliced metrics + "MACRO_AVERAGE": The unweighted average, default behavior + "MICRO_AVERAGE": The weighted average + aggregation_threshold (float): + Optional. The threshold used to generate aggregated metrics, default 0 for multi-class classification, 0.5 for binary classification. + recall (float): + Optional. Recall (True Positive Rate) for the given confidence threshold. + precision (float): + Optional. Precision for the given confidence threshold. + f1_score (float): + Optional. The harmonic mean of recall and precision. + accuracy (float): + Optional. Accuracy is the fraction of predictions given the correct label. + For multiclass this is a micro-average metric. au_prc (float): Optional. The Area Under Precision-Recall Curve metric. Micro-averaged for the overall evaluation. @@ -298,6 +328,10 @@ def __init__( Micro-averaged for the overall evaluation. log_loss (float): Optional. The Log Loss metric. + confusion_matrix (utils.ConfusionMatrix): + Optional. Aggregated confusion matrix. + confidence_metrics (List[utils.ConfidenceMetric]): + Optional. List of metrics for different confidence thresholds. artifact_id (str): Optional. The portion of the Artifact name with the format. This is globally unique in a metadataStore: @@ -323,12 +357,34 @@ def __init__( check the validity of state transitions. """ extended_metadata = copy.deepcopy(metadata) if metadata else {} + if aggregation_type: + if aggregation_type not in _CLASSIFICATION_METRICS_AGGREGATION_TYPE: + raise ValueError( + "aggregation_type can only be 'AGGREGATION_TYPE_UNSPECIFIED', 'MACRO_AVERAGE', or 'MICRO_AVERAGE'." + ) + extended_metadata["aggregationType"] = aggregation_type + if aggregation_threshold: + extended_metadata["aggregationThreshold"] = aggregation_threshold + if recall: + extended_metadata["recall"] = recall + if precision: + extended_metadata["precision"] = precision + if f1_score: + extended_metadata["f1Score"] = f1_score + if accuracy: + extended_metadata["accuracy"] = accuracy if au_prc: extended_metadata["auPrc"] = au_prc if au_roc: extended_metadata["auRoc"] = au_roc if log_loss: extended_metadata["logLoss"] = log_loss + if confusion_matrix: + extended_metadata["confusionMatrix"] = confusion_matrix.to_dict() + if confidence_metrics: + extended_metadata["confidenceMetrics"] = [ + confidence_metric.to_dict() for confidence_metric in confidence_metrics + ] super(ClassificationMetrics, self).__init__( uri=uri, diff --git a/google/cloud/aiplatform/metadata/schema/utils.py b/google/cloud/aiplatform/metadata/schema/utils.py index 1b4a5e4f6c..fbda44d6ae 100644 --- a/google/cloud/aiplatform/metadata/schema/utils.py +++ b/google/cloud/aiplatform/metadata/schema/utils.py @@ -143,6 +143,154 @@ def to_dict(self): return results +@dataclass +class AnnotationSpec: + """A class that represents the annotation spec of a Confusion Matrix. + Args: + display_name (str): + Optional. Display name for a column of a confusion matrix. + id (List[str]): + Optional. Id for a column of a confusion matrix. + """ + + display_name: Optional[str] = None + id: Optional[str] = None + + def to_dict(self): + """ML metadata schema dictionary representation of this DataClass""" + results = {} + if self.display_name: + results["displayName"] = self.display_name + if self.id: + results["id"] = self.id + + return results + + +@dataclass +class ConfusionMatrix: + """A class that represents a Confusion Matrix. + Args: + matrix (List[List[int]]): + Required. A 2D array of integers that represets the values for the confusion matrix. + annotation_specs: (List(AnnotationSpec)): + Optional. List of column annotation specs which contains display_name (str) and id (str) + """ + + matrix: List[List[int]] + annotation_specs: Optional[List[AnnotationSpec]] = None + + def to_dict(self): + """ML metadata schema dictionary representation of this DataClass""" + results = {} + if self.annotation_specs: + results["annotationSpecs"] = [ + annotation_spec.to_dict() for annotation_spec in self.annotation_specs + ] + if self.matrix: + results["rows"] = self.matrix + + return results + + +@dataclass +class ConfidenceMetric: + """A class that represents a Confidence Metric. + Args: + confidence_threshold (float): + Required. Metrics are computed with an assumption that the Model never returns predictions with a score lower than this value. + For binary classification this is the positive class threshold. For multi-class classification this is the confidence threshold. + recall (float): + Optional. Recall (True Positive Rate) for the given confidence threshold. + precision (float): + Optional. Precision for the given confidence threshold. + f1_score (float): + Optional. The harmonic mean of recall and precision. + max_predictions (int): + Optional. Metrics are computed with an assumption that the Model always returns at most this many predictions (ordered by their score, descendingly). + But they all still need to meet the `confidence_threshold`. + false_positive_rate (float): + Optional. False Positive Rate for the given confidence threshold. + accuracy (float): + Optional. Accuracy is the fraction of predictions given the correct label. For multiclass this is a micro-average metric. + true_positive_count (int): + Optional. The number of Model created labels that match a ground truth label. + false_positive_count (int): + Optional. The number of Model created labels that do not match a ground truth label. + false_negative_count (int): + Optional. The number of ground truth labels that are not matched by a Model created label. + true_negative_count (int): + Optional. The number of labels that were not created by the Model, but if they would, they would not match a ground truth label. + recall_at_1 (float): + Optional. The Recall (True Positive Rate) when only considering the label that has the highest prediction score + and not below the confidence threshold for each DataItem. + precision_at_1 (float): + Optional. The precision when only considering the label that has the highest prediction score + and not below the confidence threshold for each DataItem. + false_positive_rate_at_1 (float): + Optional. The False Positive Rate when only considering the label that has the highest prediction score + and not below the confidence threshold for each DataItem. + f1_score_at_1 (float): + Optional. The harmonic mean of recallAt1 and precisionAt1. + confusion_matrix (ConfusionMatrix): + Optional. Confusion matrix for the given confidence threshold. + """ + + confidence_threshold: float + recall: Optional[float] = None + precision: Optional[float] = None + f1_score: Optional[float] = None + max_predictions: Optional[int] = None + false_positive_rate: Optional[float] = None + accuracy: Optional[float] = None + true_positive_count: Optional[int] = None + false_positive_count: Optional[int] = None + false_negative_count: Optional[int] = None + true_negative_count: Optional[int] = None + recall_at_1: Optional[float] = None + precision_at_1: Optional[float] = None + false_positive_rate_at_1: Optional[float] = None + f1_score_at_1: Optional[float] = None + confusion_matrix: Optional[ConfusionMatrix] = None + + def to_dict(self): + """ML metadata schema dictionary representation of this DataClass""" + results = {} + results["confidenceThreshold"] = self.confidence_threshold + if self.recall: + results["recall"] = self.recall + if self.precision: + results["precision"] = self.precision + if self.f1_score: + results["f1Score"] = self.f1_score + if self.max_predictions: + results["maxPredictions"] = self.max_predictions + if self.false_positive_rate: + results["falsePositiveRate"] = self.false_positive_rate + if self.accuracy: + results["accuracy"] = self.accuracy + if self.true_positive_count: + results["truePositiveCount"] = self.true_positive_count + if self.false_positive_count: + results["falsePositiveCount"] = self.false_positive_count + if self.false_negative_count: + results["falseNegativeCount"] = self.false_negative_count + if self.true_negative_count: + results["trueNegativeCount"] = self.true_negative_count + if self.recall_at_1: + results["recallAt1"] = self.recall_at_1 + if self.precision_at_1: + results["precisionAt1"] = self.precision_at_1 + if self.false_positive_rate_at_1: + results["falsePositiveRateAt1"] = self.false_positive_rate_at_1 + if self.f1_score_at_1: + results["f1ScoreAt1"] = self.f1_score_at_1 + if self.confusion_matrix: + results["confusionMatrix"] = self.confusion_matrix.to_dict() + + return results + + def create_uri_from_resource_name(resource_name: str) -> str: """Construct the service URI for a given resource_name. Args: diff --git a/tests/unit/aiplatform/test_metadata.py b/tests/unit/aiplatform/test_metadata.py index aa0907def1..a8a73b899e 100644 --- a/tests/unit/aiplatform/test_metadata.py +++ b/tests/unit/aiplatform/test_metadata.py @@ -54,11 +54,8 @@ from google.cloud.aiplatform.metadata import constants from google.cloud.aiplatform.metadata import experiment_run_resource from google.cloud.aiplatform.metadata import metadata -from google.cloud.aiplatform.metadata import artifact -from google.cloud.aiplatform.metadata import context from google.cloud.aiplatform.metadata import metadata_store from google.cloud.aiplatform.metadata import utils as metadata_utils -from google.cloud.aiplatform.metadata.schema import base_artifact from google.cloud.aiplatform import utils @@ -425,33 +422,29 @@ def query_execution_inputs_and_outputs_mock(): _TEST_CLASSIFICATION_METRICS_METADATA = { "confusionMatrix": { "annotationSpecs": [{"displayName": "cat"}, {"displayName": "dog"}], - "rows": [{"row": [9, 1]}, {"row": [1, 9]}], + "rows": [[9, 1], [1, 9]], }, "confidenceMetrics": [ {"confidenceThreshold": 0.9, "recall": 0.1, "falsePositiveRate": 0.1}, - {"confidenceThreshold": 0.5, "recall": 0.5, "falsePositiveRate": 0.7}, + {"confidenceThreshold": 0.5, "recall": 0.7, "falsePositiveRate": 0.5}, {"confidenceThreshold": 0.1, "recall": 0.9, "falsePositiveRate": 0.9}, ], } -_TEST_CLASSIFICATION_METRICS_ARTIFACT_RESOURCE = GapicArtifact( +_TEST_CLASSIFICATION_METRICS_ARTIFACT = GapicArtifact( name=_TEST_ARTIFACT_NAME, display_name=_TEST_CLASSIFICATION_METRICS["display_name"], schema_title=constants.GOOGLE_CLASSIFICATION_METRICS, schema_version=constants._DEFAULT_SCHEMA_VERSION, metadata=_TEST_CLASSIFICATION_METRICS_METADATA, -) - -_TEST_CLASSIFICATION_METRICS_ARTIFACT = artifact.Artifact._empty_constructor() -_TEST_CLASSIFICATION_METRICS_ARTIFACT._gca_resource = ( - _TEST_CLASSIFICATION_METRICS_ARTIFACT_RESOURCE + state=GapicArtifact.State.LIVE, ) @pytest.fixture def create_classification_metrics_artifact_mock(): with patch.object( - base_artifact.BaseArtifactSchema, "create" + MetadataServiceClient, "create_artifact" ) as create_classification_metrics_artifact_mock: create_classification_metrics_artifact_mock.return_value = ( _TEST_CLASSIFICATION_METRICS_ARTIFACT @@ -460,11 +453,14 @@ def create_classification_metrics_artifact_mock(): @pytest.fixture -def context_add_artifacts_and_executions_mock(): +def get_classification_metrics_artifact_mock(): with patch.object( - context.Context, "add_artifacts_and_executions" - ) as context_add_artifacts_and_executions_mock: - yield context_add_artifacts_and_executions_mock + MetadataServiceClient, "get_artifact" + ) as get_classification_metrics_artifact_mock: + get_classification_metrics_artifact_mock.return_value = ( + _TEST_CLASSIFICATION_METRICS_ARTIFACT + ) + yield get_classification_metrics_artifact_mock @pytest.fixture @@ -1195,11 +1191,12 @@ def test_log_metrics(self, update_context_mock): "get_experiment_mock", "create_experiment_run_context_mock", "add_context_children_mock", - "create_classification_metrics_artifact_mock", ) def test_log_classification_metrics( self, - context_add_artifacts_and_executions_mock, + create_classification_metrics_artifact_mock, + get_classification_metrics_artifact_mock, + add_context_artifacts_and_executions_mock, ): aiplatform.init( project=_TEST_PROJECT, @@ -1216,8 +1213,27 @@ def test_log_classification_metrics( threshold=_TEST_CLASSIFICATION_METRICS["threshold"], ) - context_add_artifacts_and_executions_mock.assert_called_once_with( - artifact_resource_names=[_TEST_ARTIFACT_NAME] + expected_artifact = GapicArtifact( + display_name=_TEST_CLASSIFICATION_METRICS["display_name"], + schema_title=constants.GOOGLE_CLASSIFICATION_METRICS, + schema_version=constants._DEFAULT_SCHEMA_VERSION, + metadata=_TEST_CLASSIFICATION_METRICS_METADATA, + state=GapicArtifact.State.LIVE, + ) + create_classification_metrics_artifact_mock.assert_called_once_with( + parent=_TEST_PARENT, + artifact=expected_artifact, + artifact_id=None, + ) + + get_classification_metrics_artifact_mock.assert_called_once_with( + name=_TEST_ARTIFACT_NAME, retry=base._DEFAULT_RETRY + ) + + add_context_artifacts_and_executions_mock.assert_called_once_with( + context=_TEST_EXPERIMENT_RUN_CONTEXT_NAME, + artifacts=[_TEST_ARTIFACT_NAME], + executions=None, ) @pytest.mark.usefixtures( diff --git a/tests/unit/aiplatform/test_metadata_schema.py b/tests/unit/aiplatform/test_metadata_schema.py index 2a1705feeb..8af4d351da 100644 --- a/tests/unit/aiplatform/test_metadata_schema.py +++ b/tests/unit/aiplatform/test_metadata_schema.py @@ -64,6 +64,7 @@ _TEST_DESCRIPTION = "test description" _TEST_METADATA = {"test-param1": 1, "test-param2": "test-value", "test-param3": True} _TEST_UPDATED_METADATA = { + "test-param1": 2.0, "test-param2": "test-value-1", "test-param3": False, } From fbc98ab4d67b576c1a0d20f92b6d6ec7257e75c3 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Thu, 29 Sep 2022 09:24:43 -0700 Subject: [PATCH 11/14] fix: ClassificationMetrics doesn't catch params with value=0 --- .../metadata/schema/google/artifact_schema.py | 16 +++++----- .../cloud/aiplatform/metadata/schema/utils.py | 30 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py index e1b25f048d..44a5670339 100644 --- a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py +++ b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py @@ -363,21 +363,21 @@ def __init__( "aggregation_type can only be 'AGGREGATION_TYPE_UNSPECIFIED', 'MACRO_AVERAGE', or 'MICRO_AVERAGE'." ) extended_metadata["aggregationType"] = aggregation_type - if aggregation_threshold: + if aggregation_threshold is not None: extended_metadata["aggregationThreshold"] = aggregation_threshold - if recall: + if recall is not None: extended_metadata["recall"] = recall - if precision: + if precision is not None: extended_metadata["precision"] = precision - if f1_score: + if f1_score is not None: extended_metadata["f1Score"] = f1_score - if accuracy: + if accuracy is not None: extended_metadata["accuracy"] = accuracy - if au_prc: + if au_prc is not None: extended_metadata["auPrc"] = au_prc - if au_roc: + if au_roc is not None: extended_metadata["auRoc"] = au_roc - if log_loss: + if log_loss is not None: extended_metadata["logLoss"] = log_loss if confusion_matrix: extended_metadata["confusionMatrix"] = confusion_matrix.to_dict() diff --git a/google/cloud/aiplatform/metadata/schema/utils.py b/google/cloud/aiplatform/metadata/schema/utils.py index fbda44d6ae..336699065b 100644 --- a/google/cloud/aiplatform/metadata/schema/utils.py +++ b/google/cloud/aiplatform/metadata/schema/utils.py @@ -149,7 +149,7 @@ class AnnotationSpec: Args: display_name (str): Optional. Display name for a column of a confusion matrix. - id (List[str]): + id (str): Optional. Id for a column of a confusion matrix. """ @@ -257,33 +257,33 @@ def to_dict(self): """ML metadata schema dictionary representation of this DataClass""" results = {} results["confidenceThreshold"] = self.confidence_threshold - if self.recall: + if self.recall is not None: results["recall"] = self.recall - if self.precision: + if self.precision is not None: results["precision"] = self.precision - if self.f1_score: + if self.f1_score is not None: results["f1Score"] = self.f1_score - if self.max_predictions: + if self.max_predictions is not None: results["maxPredictions"] = self.max_predictions - if self.false_positive_rate: + if self.false_positive_rate is not None: results["falsePositiveRate"] = self.false_positive_rate - if self.accuracy: + if self.accuracy is not None: results["accuracy"] = self.accuracy - if self.true_positive_count: + if self.true_positive_count is not None: results["truePositiveCount"] = self.true_positive_count - if self.false_positive_count: + if self.false_positive_count is not None: results["falsePositiveCount"] = self.false_positive_count - if self.false_negative_count: + if self.false_negative_count is not None: results["falseNegativeCount"] = self.false_negative_count - if self.true_negative_count: + if self.true_negative_count is not None: results["trueNegativeCount"] = self.true_negative_count - if self.recall_at_1: + if self.recall_at_1 is not None: results["recallAt1"] = self.recall_at_1 - if self.precision_at_1: + if self.precision_at_1 is not None: results["precisionAt1"] = self.precision_at_1 - if self.false_positive_rate_at_1: + if self.false_positive_rate_at_1 is not None: results["falsePositiveRateAt1"] = self.false_positive_rate_at_1 - if self.f1_score_at_1: + if self.f1_score_at_1 is not None: results["f1ScoreAt1"] = self.f1_score_at_1 if self.confusion_matrix: results["confusionMatrix"] = self.confusion_matrix.to_dict() From 6fb76ba776d48d11a6d95c38aaf44066045308ef Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Thu, 29 Sep 2022 12:05:26 -0700 Subject: [PATCH 12/14] add sample for get_classification_metrics --- samples/model-builder/conftest.py | 12 ++++++- ...iment_run_classification_metrics_sample.py | 34 +++++++++++++++++++ ..._run_classification_metrics_sample_test.py | 34 +++++++++++++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py create mode 100644 samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py index a303a3be6d..6a4fde868a 100644 --- a/samples/model-builder/conftest.py +++ b/samples/model-builder/conftest.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -642,6 +642,11 @@ def mock_time_series_metrics(): mock = MagicMock() yield mock +@pytest.fixture +def mock_classification_metrics(): + mock = MagicMock() + yield mock + @pytest.fixture def mock_get_execution(mock_execution): @@ -889,6 +894,11 @@ def mock_get_time_series_metrics(mock_time_series_metrics, mock_experiment_run): mock_get_time_series_metrics.return_value = mock_time_series_metrics yield mock_get_time_series_metrics +@pytest.fixture +def mock_get_classification_metrics(mock_classification_metrics, mock_experiment_run): + with patch.object(mock_experiment_run, "get_classification_metrics") as mock_get_classification_metrics: + mock_get_classification_metrics.return_value = mock_classification_metrics + yield mock_get_classification_metrics """ ---------------------------------------------------------------------------- diff --git a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py new file mode 100644 index 0000000000..e2676094c5 --- /dev/null +++ b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Union, List + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_get_experiment_run_classification_metrics_sample] +def get_experiment_run_classification_metrics_sample( + run_name: str, + experiment: Union[str, aiplatform.Experiment], + project: str, + location: str, +) -> List[Dict[str, Union[str, List]]]: + experiment_run = aiplatform.ExperimentRun( + run_name=run_name, experiment=experiment, project=project, location=location + ) + + return experiment_run.get_classification_metrics() + + +# [END aiplatform_sdk_get_experiment_run_classification_metrics_sample] diff --git a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py new file mode 100644 index 0000000000..438cdf9199 --- /dev/null +++ b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import get_experiment_run_params_sample + +import pytest + +import test_constants as constants + + +@pytest.mark.usefixtures("mock_get_run") +def test_get_experiment_run_classification_metrics_sample(mock_get_classification_metrics, mock_classification_metrics): + + classification_metrics = get_experiment_run_params_sample.get_experiment_run_classification_metrics_sample( + run_name=constants.EXPERIMENT_RUN_NAME, + experiment=constants.EXPERIMENT_NAME, + project=constants.PROJECT, + location=constants.LOCATION, + ) + + mock_get_classification_metrics.assert_called_with() + + assert classification_metrics is mock_classification_metrics From 96ef2f388227a4299f96efcbccfd919e23a1943d Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Thu, 29 Sep 2022 12:22:34 -0700 Subject: [PATCH 13/14] fix linting --- samples/model-builder/conftest.py | 3 +++ .../get_experiment_run_classification_metrics_sample.py | 2 +- .../get_experiment_run_classification_metrics_sample_test.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py index 6a4fde868a..b5bec845ba 100644 --- a/samples/model-builder/conftest.py +++ b/samples/model-builder/conftest.py @@ -642,6 +642,7 @@ def mock_time_series_metrics(): mock = MagicMock() yield mock + @pytest.fixture def mock_classification_metrics(): mock = MagicMock() @@ -894,12 +895,14 @@ def mock_get_time_series_metrics(mock_time_series_metrics, mock_experiment_run): mock_get_time_series_metrics.return_value = mock_time_series_metrics yield mock_get_time_series_metrics + @pytest.fixture def mock_get_classification_metrics(mock_classification_metrics, mock_experiment_run): with patch.object(mock_experiment_run, "get_classification_metrics") as mock_get_classification_metrics: mock_get_classification_metrics.return_value = mock_classification_metrics yield mock_get_classification_metrics + """ ---------------------------------------------------------------------------- Model Versioning Fixtures diff --git a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py index e2676094c5..284ed9f968 100644 --- a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py +++ b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Union, List +from typing import Dict, List, Union from google.cloud import aiplatform diff --git a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py index 438cdf9199..3f6deb80bf 100644 --- a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py +++ b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import get_experiment_run_params_sample +import get_experiment_run_classification_metrics_sample import pytest @@ -22,7 +22,7 @@ @pytest.mark.usefixtures("mock_get_run") def test_get_experiment_run_classification_metrics_sample(mock_get_classification_metrics, mock_classification_metrics): - classification_metrics = get_experiment_run_params_sample.get_experiment_run_classification_metrics_sample( + classification_metrics = get_experiment_run_classification_metrics_sample.get_experiment_run_classification_metrics_sample( run_name=constants.EXPERIMENT_RUN_NAME, experiment=constants.EXPERIMENT_NAME, project=constants.PROJECT, From 98cd8051bff0511c3e139bac59244c7c3015762b Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Thu, 29 Sep 2022 16:54:00 -0700 Subject: [PATCH 14/14] add todos --- .../cloud/aiplatform/metadata/schema/google/artifact_schema.py | 1 + google/cloud/aiplatform/metadata/schema/utils.py | 1 + 2 files changed, 2 insertions(+) diff --git a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py index 44a5670339..4941e42480 100644 --- a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py +++ b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py @@ -359,6 +359,7 @@ def __init__( extended_metadata = copy.deepcopy(metadata) if metadata else {} if aggregation_type: if aggregation_type not in _CLASSIFICATION_METRICS_AGGREGATION_TYPE: + ## Todo: add negative test case for this raise ValueError( "aggregation_type can only be 'AGGREGATION_TYPE_UNSPECIFIED', 'MACRO_AVERAGE', or 'MICRO_AVERAGE'." ) diff --git a/google/cloud/aiplatform/metadata/schema/utils.py b/google/cloud/aiplatform/metadata/schema/utils.py index 336699065b..c6e23735b6 100644 --- a/google/cloud/aiplatform/metadata/schema/utils.py +++ b/google/cloud/aiplatform/metadata/schema/utils.py @@ -181,6 +181,7 @@ class ConfusionMatrix: annotation_specs: Optional[List[AnnotationSpec]] = None def to_dict(self): + ## Todo: add a validation to check 'matrix' and 'annotation_specs' have the same length """ML metadata schema dictionary representation of this DataClass""" results = {} if self.annotation_specs: