From 94b2f29f040829bbc97d29385f7652c377d9b36b Mon Sep 17 00:00:00 2001
From: Jaycee Li <jayceeli@google.com>
Date: Wed, 11 Jan 2023 22:40:00 -0800
Subject: [PATCH] feat: add MLMD schema class ExperimentModel

PiperOrigin-RevId: 501468901
---
 .../metadata/schema/base_artifact.py          |  24 ++-
 .../metadata/schema/google/artifact_schema.py | 157 ++++++++++++++-
 .../cloud/aiplatform/metadata/schema/utils.py |  48 ++++-
 tests/unit/aiplatform/test_metadata_schema.py | 187 ++++++++++++++++++
 4 files changed, 408 insertions(+), 8 deletions(-)

diff --git a/google/cloud/aiplatform/metadata/schema/base_artifact.py b/google/cloud/aiplatform/metadata/schema/base_artifact.py
index ad8d751d77..58a8229328 100644
--- a/google/cloud/aiplatform/metadata/schema/base_artifact.py
+++ b/google/cloud/aiplatform/metadata/schema/base_artifact.py
@@ -107,6 +107,10 @@ def _init_with_resource_name(
         self,
         *,
         artifact_name: str,
+        metadata_store_id: str = "default",
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
     ):
 
         """Initializes the Artifact instance using an existing resource.
@@ -115,13 +119,31 @@ def _init_with_resource_name(
             artifact_name (str):
                 Artifact name with the following format, this is globally unique in a metadataStore:
                 projects/123/locations/us-central1/metadataStores/<metadata_store_id>/artifacts/<resource_id>.
+            metadata_store_id (str):
+                Optional. MetadataStore to retrieve Artifact from. If not set, metadata_store_id is set to "default".
+                If artifact_name is a fully-qualified resource, its metadata_store_id overrides this one.
+            project (str):
+                Optional. Project to retrieve the artifact from. If not set, project
+                set in aiplatform.init will be used.
+            location (str):
+                Optional. Location to retrieve the Artifact from. If not set, location
+                set in aiplatform.init will be used.
+            credentials (auth_credentials.Credentials):
+                Optional. Custom credentials to use to retrieve this Artifact. Overrides
+                credentials set in aiplatform.init.
         """
         # Add User Agent Header for metrics tracking if one is not specified
         # If one is already specified this call was initiated by a sub class.
         if not base_constants.USER_AGENT_SDK_COMMAND:
             base_constants.USER_AGENT_SDK_COMMAND = "aiplatform.metadata.schema.base_artifact.BaseArtifactSchema._init_with_resource_name"
 
-        super(BaseArtifactSchema, self).__init__(artifact_name=artifact_name)
+        super(BaseArtifactSchema, self).__init__(
+            artifact_name=artifact_name,
+            metadata_store_id=metadata_store_id,
+            project=project,
+            location=location,
+            credentials=credentials,
+        )
 
     def create(
         self,
diff --git a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py
index 264eff9168..eae4e44ace 100644
--- a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py
+++ b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py
@@ -17,6 +17,7 @@
 import copy
 from typing import Optional, Dict, List
 
+from google.auth import credentials as auth_credentials
 from google.cloud.aiplatform.compat.types import artifact as gca_artifact
 from google.cloud.aiplatform.metadata.schema import base_artifact
 from google.cloud.aiplatform.metadata.schema import utils
@@ -359,7 +360,6 @@ def __init__(
         extended_metadata = copy.deepcopy(metadata) if metadata else {}
         if aggregation_type:
             if aggregation_type not in _CLASSIFICATION_METRICS_AGGREGATION_TYPE:
-                ## Todo: add negative test case for this
                 raise ValueError(
                     "aggregation_type can only be 'AGGREGATION_TYPE_UNSPECIFIED', 'MACRO_AVERAGE', or 'MICRO_AVERAGE'."
                 )
@@ -583,3 +583,158 @@ def __init__(
             metadata=extended_metadata,
             state=state,
         )
+
+
+class ExperimentModel(base_artifact.BaseArtifactSchema):
+    """An artifact representing a Vertex Experiment Model."""
+
+    schema_title = "google.ExperimentModel"
+
+    RESERVED_METADATA_KEYS = [
+        "frameworkName",
+        "frameworkVersion",
+        "modelFile",
+        "modelClass",
+        "predictSchemata",
+    ]
+
+    def __init__(
+        self,
+        *,
+        framework_name: str,
+        framework_version: str,
+        model_file: str,
+        uri: str,
+        model_class: Optional[str] = None,
+        predict_schemata: Optional[utils.PredictSchemata] = None,
+        artifact_id: Optional[str] = None,
+        display_name: Optional[str] = None,
+        schema_version: Optional[str] = None,
+        description: Optional[str] = None,
+        metadata: Optional[Dict] = None,
+        state: Optional[gca_artifact.Artifact.State] = gca_artifact.Artifact.State.LIVE,
+    ):
+        """Args:
+        framework_name (str):
+            Required. The name of the model's framework. E.g., 'sklearn'
+        framework_version (str):
+            Required. The version of the model's framework. E.g., '1.1.0'
+        model_file (str):
+            Required. The file name of the model. E.g., 'model.pkl'
+        uri (str):
+            Required. The uniform resource identifier of the model artifact directory.
+        model_class (str):
+            Optional. The class name of the model. E.g., 'sklearn.linear_model._base.LinearRegression'
+        predict_schemata (PredictSchemata):
+            Optional. An instance of PredictSchemata which holds instance, parameter and prediction schema uris.
+        artifact_id (str):
+            Optional. The <resource_id> portion of the Artifact name with
+            the format. This is globally unique in a metadataStore:
+            projects/123/locations/us-central1/metadataStores/<metadata_store_id>/artifacts/<resource_id>.
+        display_name (str):
+            Optional. The user-defined name of the Artifact.
+        schema_version (str):
+            Optional. schema_version specifies the version used by the Artifact.
+            If not set, defaults to use the latest version.
+        description (str):
+            Optional. Describes the purpose of the Artifact to be created.
+        metadata (Dict):
+            Optional. Contains the metadata information that will be stored in the Artifact.
+        state (google.cloud.gapic.types.Artifact.State):
+            Optional. The state of this Artifact. This is a
+            property of the Artifact, and does not imply or
+            apture any ongoing process. This property is
+            managed by clients (such as Vertex AI
+            Pipelines), and the system does not prescribe or
+            check the validity of state transitions.
+        """
+        if metadata:
+            for k in metadata:
+                if k in self.RESERVED_METADATA_KEYS:
+                    raise ValueError(f"'{k}' is a system reserved key in metadata.")
+            extended_metadata = copy.deepcopy(metadata)
+        else:
+            extended_metadata = {}
+        extended_metadata["frameworkName"] = framework_name
+        extended_metadata["frameworkVersion"] = framework_version
+        extended_metadata["modelFile"] = model_file
+        if model_class is not None:
+            extended_metadata["modelClass"] = model_class
+        if predict_schemata is not None:
+            extended_metadata["predictSchemata"] = predict_schemata.to_dict()
+
+        super().__init__(
+            uri=uri,
+            artifact_id=artifact_id,
+            display_name=display_name,
+            schema_version=schema_version,
+            description=description,
+            metadata=extended_metadata,
+            state=state,
+        )
+
+    @classmethod
+    def get(
+        cls,
+        artifact_id: str,
+        *,
+        metadata_store_id: str = "default",
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+    ) -> "ExperimentModel":
+        """Retrieves an existing ExperimentModel artifact given an artifact id.
+
+        Args:
+            artifact_id (str):
+                Required. An artifact id of the ExperimentModel artifact.
+            metadata_store_id (str):
+                Optional. MetadataStore to retrieve Artifact from. If not set, metadata_store_id is set to "default".
+                If artifact_id is a fully-qualified resource name, its metadata_store_id overrides this one.
+            project (str):
+                Optional. Project to retrieve the artifact from. If not set, project
+                set in aiplatform.init will be used.
+            location (str):
+                Optional. Location to retrieve the Artifact from. If not set, location
+                set in aiplatform.init will be used.
+            credentials (auth_credentials.Credentials):
+                Optional. Custom credentials to use to retrieve this Artifact. Overrides
+                credentials set in aiplatform.init.
+
+        Returns:
+            An ExperimentModel class that represents an Artifact resource.
+
+        Raises:
+            ValueError: if artifact's schema title is not 'google.ExperimentModel'.
+        """
+        experiment_model = ExperimentModel(
+            framework_name="",
+            framework_version="",
+            model_file="",
+            uri="",
+        )
+        experiment_model._init_with_resource_name(
+            artifact_name=artifact_id,
+            metadata_store_id=metadata_store_id,
+            project=project,
+            location=location,
+            credentials=credentials,
+        )
+        if experiment_model.schema_title != cls.schema_title:
+            raise ValueError(
+                f"The schema title of the artifact must be {cls.schema_title}."
+                f"Got {experiment_model.schema_title}."
+            )
+        return experiment_model
+
+    @property
+    def framework_name(self) -> Optional[str]:
+        return self.metadata.get("frameworkName")
+
+    @property
+    def framework_version(self) -> Optional[str]:
+        return self.metadata.get("frameworkVersion")
+
+    @property
+    def model_class(self) -> Optional[str]:
+        return self.metadata.get("modelClass")
diff --git a/google/cloud/aiplatform/metadata/schema/utils.py b/google/cloud/aiplatform/metadata/schema/utils.py
index c6e23735b6..3016fd4d56 100644
--- a/google/cloud/aiplatform/metadata/schema/utils.py
+++ b/google/cloud/aiplatform/metadata/schema/utils.py
@@ -50,7 +50,12 @@ class PredictSchemata:
     prediction_schema_uri: str
 
     def to_dict(self):
-        """ML metadata schema dictionary representation of this DataClass"""
+        """ML metadata schema dictionary representation of this DataClass.
+
+
+        Returns:
+            A dictionary that represents the PredictSchemata class.
+        """
         results = {}
         results["instanceSchemaUri"] = self.instance_schema_uri
         results["parametersSchemaUri"] = self.parameters_schema_uri
@@ -62,6 +67,7 @@ def to_dict(self):
 @dataclass
 class ContainerSpec:
     """Container configuration for the model.
+
     Args:
         image_uri (str):
             Required. URI of the Docker image to be used as the custom
@@ -124,7 +130,12 @@ class ContainerSpec:
     health_route: Optional[str] = None
 
     def to_dict(self):
-        """ML metadata schema dictionary representation of this DataClass"""
+        """ML metadata schema dictionary representation of this DataClass.
+
+
+        Returns:
+            A dictionary that represents the ContainerSpec class.
+        """
         results = {}
         results["imageUri"] = self.image_uri
         if self.command:
@@ -146,6 +157,7 @@ def to_dict(self):
 @dataclass
 class AnnotationSpec:
     """A class that represents the annotation spec of a Confusion Matrix.
+
     Args:
         display_name (str):
             Optional. Display name for a column of a confusion matrix.
@@ -157,7 +169,12 @@ class AnnotationSpec:
     id: Optional[str] = None
 
     def to_dict(self):
-        """ML metadata schema dictionary representation of this DataClass"""
+        """ML metadata schema dictionary representation of this DataClass.
+
+
+        Returns:
+            A dictionary that represents the AnnotationSpec class.
+        """
         results = {}
         if self.display_name:
             results["displayName"] = self.display_name
@@ -170,6 +187,7 @@ def to_dict(self):
 @dataclass
 class ConfusionMatrix:
     """A class that represents a Confusion Matrix.
+
     Args:
         matrix (List[List[int]]):
             Required. A 2D array of integers that represets the values for the confusion matrix.
@@ -181,10 +199,23 @@ class ConfusionMatrix:
     annotation_specs: Optional[List[AnnotationSpec]] = None
 
     def to_dict(self):
-        ## Todo: add a validation to check 'matrix' and 'annotation_specs' have the same length
-        """ML metadata schema dictionary representation of this DataClass"""
+        """ML metadata schema dictionary representation of this DataClass.
+
+        Returns:
+            A dictionary that represents the ConfusionMatrix class.
+
+        Raises:
+            ValueError: if annotation_specs and matrix have different length.
+        """
         results = {}
         if self.annotation_specs:
+            if len(self.annotation_specs) != len(self.matrix):
+                raise ValueError(
+                    "Length of annotation_specs and matrix must be the same. "
+                    "Got lengths {} and {} respectively.".format(
+                        len(self.annotation_specs), len(self.matrix)
+                    )
+                )
             results["annotationSpecs"] = [
                 annotation_spec.to_dict() for annotation_spec in self.annotation_specs
             ]
@@ -255,7 +286,12 @@ class ConfidenceMetric:
     confusion_matrix: Optional[ConfusionMatrix] = None
 
     def to_dict(self):
-        """ML metadata schema dictionary representation of this DataClass"""
+        """ML metadata schema dictionary representation of this DataClass.
+
+
+        Returns:
+            A dictionary that represents the ConfidenceMetric class.
+        """
         results = {}
         results["confidenceThreshold"] = self.confidence_threshold
         if self.recall is not None:
diff --git a/tests/unit/aiplatform/test_metadata_schema.py b/tests/unit/aiplatform/test_metadata_schema.py
index 826a99b942..5c32157cd2 100644
--- a/tests/unit/aiplatform/test_metadata_schema.py
+++ b/tests/unit/aiplatform/test_metadata_schema.py
@@ -954,6 +954,16 @@ def test_classification_metrics_constructor_parameters_are_set_correctly(self):
         )
         assert artifact.schema_version == _TEST_SCHEMA_VERSION
 
+    def test_classification_metrics_wrong_aggregation_type(self):
+        with pytest.raises(ValueError) as exception:
+            google_artifact_schema.ClassificationMetrics(
+                aggregation_type="unspecified_type"
+            )
+        assert (
+            str(exception.value)
+            == "aggregation_type can only be 'AGGREGATION_TYPE_UNSPECIFIED', 'MACRO_AVERAGE', or 'MICRO_AVERAGE'."
+        )
+
     def test_regression_metrics_title_is_set_correctly(self):
         artifact = google_artifact_schema.RegressionMetrics()
         assert artifact.schema_title == "google.RegressionMetrics"
@@ -1051,6 +1061,75 @@ def test_forecasting_metrics_constructor_parameters_are_set_correctly(self):
         )
         assert artifact.schema_version == _TEST_SCHEMA_VERSION
 
+    def test_experiment_model_title_is_set_correctly(self):
+        artifact = google_artifact_schema.ExperimentModel(
+            framework_name="sklearn",
+            framework_version="1.0.0",
+            model_file="model.pkl",
+            uri=_TEST_URI,
+        )
+        assert artifact.schema_title == "google.ExperimentModel"
+        assert artifact.framework_name == "sklearn"
+        assert artifact.framework_version == "1.0.0"
+        assert artifact.uri == _TEST_URI
+
+    def test_experiment_model_wrong_metadata_key(self):
+        with pytest.raises(ValueError) as exception:
+            google_artifact_schema.ExperimentModel(
+                framework_name="sklearn",
+                framework_version="1.0.0",
+                model_file="model.pkl",
+                uri=_TEST_URI,
+                metadata={"modelFile": "abc"},
+            )
+        assert (
+            str(exception.value) == "'modelFile' is a system reserved key in metadata."
+        )
+
+    def test_experiment_model_constructor_parameters_are_set_correctly(self):
+        predict_schemata = utils.PredictSchemata(
+            instance_schema_uri="instance_uri",
+            prediction_schema_uri="prediction_uri",
+            parameters_schema_uri="parameters_uri",
+        )
+
+        artifact = google_artifact_schema.ExperimentModel(
+            framework_name="sklearn",
+            framework_version="1.0.0",
+            model_file="model.pkl",
+            model_class="sklearn.linear_model._base.LinearRegression",
+            predict_schemata=predict_schemata,
+            artifact_id=_TEST_ARTIFACT_ID,
+            uri=_TEST_URI,
+            display_name=_TEST_DISPLAY_NAME,
+            schema_version=_TEST_SCHEMA_VERSION,
+            description=_TEST_DESCRIPTION,
+            metadata=_TEST_UPDATED_METADATA,
+        )
+        expected_metadata = {
+            "test-param1": 2.0,
+            "test-param2": "test-value-1",
+            "test-param3": False,
+            "frameworkName": "sklearn",
+            "frameworkVersion": "1.0.0",
+            "modelFile": "model.pkl",
+            "modelClass": "sklearn.linear_model._base.LinearRegression",
+            "predictSchemata": {
+                "instanceSchemaUri": "instance_uri",
+                "parametersSchemaUri": "parameters_uri",
+                "predictionSchemaUri": "prediction_uri",
+            },
+        }
+
+        assert artifact.artifact_id == _TEST_ARTIFACT_ID
+        assert artifact.uri == _TEST_URI
+        assert artifact.display_name == _TEST_DISPLAY_NAME
+        assert artifact.description == _TEST_DESCRIPTION
+        assert json.dumps(artifact.metadata, sort_keys=True) == json.dumps(
+            expected_metadata, sort_keys=True
+        )
+        assert artifact.schema_version == _TEST_SCHEMA_VERSION
+
 
 @pytest.mark.usefixtures("google_auth_mock")
 class TestMetadataSystemArtifactSchema:
@@ -1309,6 +1388,114 @@ def test_container_spec_to_dict_method_returns_correct_schema(self):
 
         assert json.dumps(container_spec.to_dict()) == json.dumps(expected_results)
 
+    def test_annotation_spec_to_dict_method_returns_correct_schema(self):
+        annotation_spec = utils.AnnotationSpec(
+            display_name="test_display_name",
+            id="test_annotation_id",
+        )
+
+        expected_results = {
+            "displayName": "test_display_name",
+            "id": "test_annotation_id",
+        }
+
+        assert json.dumps(annotation_spec.to_dict(), sort_keys=True) == json.dumps(
+            expected_results, sort_keys=True
+        )
+
+    def test_confusion_matrix_to_dict_method_returns_correct_schema(self):
+        confusion_matrix = utils.ConfusionMatrix(
+            matrix=[[9, 1], [1, 9]],
+            annotation_specs=[
+                utils.AnnotationSpec(display_name="cat"),
+                utils.AnnotationSpec(display_name="dog"),
+            ],
+        )
+
+        expected_results = {
+            "rows": [[9, 1], [1, 9]],
+            "annotationSpecs": [
+                {"displayName": "cat"},
+                {"displayName": "dog"},
+            ],
+        }
+
+        assert json.dumps(confusion_matrix.to_dict(), sort_keys=True) == json.dumps(
+            expected_results, sort_keys=True
+        )
+
+    def test_confusion_matrix_to_dict_method_length_error(self):
+        confusion_matrix = utils.ConfusionMatrix(
+            matrix=[[9, 1], [1, 9]],
+            annotation_specs=[
+                utils.AnnotationSpec(display_name="cat"),
+                utils.AnnotationSpec(display_name="dog"),
+                utils.AnnotationSpec(display_name="bird"),
+            ],
+        )
+
+        with pytest.raises(ValueError) as exception:
+            confusion_matrix.to_dict()
+        assert (
+            str(exception.value)
+            == "Length of annotation_specs and matrix must be the same. Got lengths 3 and 2 respectively."
+        )
+
+    def test_confidence_metric_to_dict_method_returns_correct_schema(self):
+        confidence_metric = utils.ConfidenceMetric(
+            confidence_threshold=0.5,
+            recall=0.5,
+            precision=0.5,
+            f1_score=0.5,
+            max_predictions=1,
+            false_positive_rate=0.5,
+            accuracy=0.5,
+            true_positive_count=1,
+            false_positive_count=1,
+            false_negative_count=1,
+            true_negative_count=1,
+            recall_at_1=0.5,
+            precision_at_1=0.5,
+            false_positive_rate_at_1=0.5,
+            f1_score_at_1=0.5,
+            confusion_matrix=utils.ConfusionMatrix(
+                matrix=[[9, 1], [1, 9]],
+                annotation_specs=[
+                    utils.AnnotationSpec(display_name="cat"),
+                    utils.AnnotationSpec(display_name="dog"),
+                ],
+            ),
+        )
+
+        expected_results = {
+            "confidenceThreshold": 0.5,
+            "recall": 0.5,
+            "precision": 0.5,
+            "f1Score": 0.5,
+            "maxPredictions": 1,
+            "falsePositiveRate": 0.5,
+            "accuracy": 0.5,
+            "truePositiveCount": 1,
+            "falsePositiveCount": 1,
+            "falseNegativeCount": 1,
+            "trueNegativeCount": 1,
+            "recallAt1": 0.5,
+            "precisionAt1": 0.5,
+            "falsePositiveRateAt1": 0.5,
+            "f1ScoreAt1": 0.5,
+            "confusionMatrix": {
+                "rows": [[9, 1], [1, 9]],
+                "annotationSpecs": [
+                    {"displayName": "cat"},
+                    {"displayName": "dog"},
+                ],
+            },
+        }
+
+        assert json.dumps(confidence_metric.to_dict(), sort_keys=True) == json.dumps(
+            expected_results, sort_keys=True
+        )
+
     @pytest.mark.usefixtures("create_execution_mock", "get_execution_mock")
     def test_start_execution_method_calls_gapic_library_with_correct_parameters(
         self, create_execution_mock