diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index 8107756229..bc1043ef03 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -86,6 +86,9 @@
 
 log_params = metadata.metadata._experiment_tracker.log_params
 log_metrics = metadata.metadata._experiment_tracker.log_metrics
+log_classification_metrics = (
+    metadata.metadata._experiment_tracker.log_classification_metrics
+)
 get_experiment_df = metadata.metadata._experiment_tracker.get_experiment_df
 start_run = metadata.metadata._experiment_tracker.start_run
 start_execution = metadata.metadata._experiment_tracker.start_execution
@@ -110,6 +113,7 @@
     "log",
     "log_params",
     "log_metrics",
+    "log_classification_metrics",
     "log_time_series_metrics",
     "get_experiment_df",
     "get_pipeline_df",
diff --git a/google/cloud/aiplatform/metadata/experiment_run_resource.py b/google/cloud/aiplatform/metadata/experiment_run_resource.py
index d61b62b7b2..326948dcec 100644
--- a/google/cloud/aiplatform/metadata/experiment_run_resource.py
+++ b/google/cloud/aiplatform/metadata/experiment_run_resource.py
@@ -39,6 +39,10 @@
 from google.cloud.aiplatform.metadata import metadata
 from google.cloud.aiplatform.metadata import resource
 from google.cloud.aiplatform.metadata import utils as metadata_utils
+from google.cloud.aiplatform.metadata.schema import utils as schema_utils
+from google.cloud.aiplatform.metadata.schema.google import (
+    artifact_schema as google_artifact_schema,
+)
 from google.cloud.aiplatform.tensorboard import tensorboard_resource
 from google.cloud.aiplatform.utils import rest_utils
 
@@ -990,6 +994,108 @@ def log_metrics(self, metrics: Dict[str, Union[float, int, str]]):
             # TODO: query the latest metrics artifact resource before logging.
             self._metadata_node.update(metadata={constants._METRIC_KEY: metrics})
 
+    @_v1_not_supported
+    def log_classification_metrics(
+        self,
+        *,
+        labels: Optional[List[str]] = None,
+        matrix: Optional[List[List[int]]] = None,
+        fpr: Optional[List[float]] = None,
+        tpr: Optional[List[float]] = None,
+        threshold: Optional[List[float]] = None,
+        display_name: Optional[str] = None,
+    ):
+        """Create an artifact for classification metrics and log to ExperimentRun. Currently supports confusion matrix and ROC curve.
+
+        ```
+        my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment')
+        my_run.log_classification_metrics(
+            display_name='my-classification-metrics',
+            labels=['cat', 'dog'],
+            matrix=[[9, 1], [1, 9]],
+            fpr=[0.1, 0.5, 0.9],
+            tpr=[0.1, 0.7, 0.9],
+            threshold=[0.9, 0.5, 0.1],
+        )
+        ```
+
+        Args:
+            labels (List[str]):
+                Optional. List of label names for the confusion matrix. Must be set if 'matrix' is set.
+            matrix (List[List[int]):
+                Optional. Values for the confusion matrix. Must be set if 'labels' is set.
+            fpr (List[float]):
+                Optional. List of false positive rates for the ROC curve. Must be set if 'tpr' or 'thresholds' is set.
+            tpr (List[float]):
+                Optional. List of true positive rates for the ROC curve. Must be set if 'fpr' or 'thresholds' is set.
+            threshold (List[float]):
+                Optional. List of thresholds for the ROC curve. Must be set if 'fpr' or 'tpr' is set.
+            display_name (str):
+                Optional. The user-defined name for the classification metric artifact.
+
+        Raises:
+            ValueError: if 'labels' and 'matrix' are not set together
+                        or if 'labels' and 'matrix' are not in the same length
+                        or if 'fpr' and 'tpr' and 'threshold' are not set together
+                        or if 'fpr' and 'tpr' and 'threshold' are not in the same length
+        """
+        if (labels or matrix) and not (labels and matrix):
+            raise ValueError("labels and matrix must be set together.")
+
+        if (fpr or tpr or threshold) and not (fpr and tpr and threshold):
+            raise ValueError("fpr, tpr, and thresholds must be set together.")
+
+        if labels and matrix:
+            if len(matrix) != len(labels):
+                raise ValueError(
+                    "Length of labels and matrix must be the same. "
+                    "Got lengths {} and {} respectively.".format(
+                        len(labels), len(matrix)
+                    )
+                )
+            annotation_specs = [
+                schema_utils.AnnotationSpec(display_name=label) for label in labels
+            ]
+            confusion_matrix = schema_utils.ConfusionMatrix(
+                annotation_specs=annotation_specs,
+                matrix=matrix,
+            )
+
+        if fpr and tpr and threshold:
+            if (
+                len(fpr) != len(tpr)
+                or len(fpr) != len(threshold)
+                or len(tpr) != len(threshold)
+            ):
+                raise ValueError(
+                    "Length of fpr, tpr and threshold must be the same. "
+                    "Got lengths {}, {} and {} respectively.".format(
+                        len(fpr), len(tpr), len(threshold)
+                    )
+                )
+
+            confidence_metrics = [
+                schema_utils.ConfidenceMetric(
+                    confidence_threshold=confidence_threshold,
+                    false_positive_rate=false_positive_rate,
+                    recall=recall,
+                )
+                for confidence_threshold, false_positive_rate, recall in zip(
+                    threshold, fpr, tpr
+                )
+            ]
+
+        classification_metrics = google_artifact_schema.ClassificationMetrics(
+            display_name=display_name,
+            confusion_matrix=confusion_matrix,
+            confidence_metrics=confidence_metrics,
+        )
+
+        classfication_metrics = classification_metrics.create()
+        self._metadata_node.add_artifacts_and_executions(
+            artifact_resource_names=[classfication_metrics.resource_name]
+        )
+
     @_v1_not_supported
     def get_time_series_data_frame(self) -> "pd.DataFrame":  # noqa: F821
         """Returns all time series in this Run as a DataFrame.
@@ -1149,6 +1255,65 @@ def get_metrics(self) -> Dict[str, Union[float, int, str]]:
         else:
             return self._metadata_node.metadata[constants._METRIC_KEY]
 
+    @_v1_not_supported
+    def get_classification_metrics(self) -> List[Dict[str, Union[str, List]]]:
+        """Get all the classification metrics logged to this run.
+
+        ```
+        my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment')
+        metric = my_run.get_classification_metrics()[0]
+        print(metric)
+        ## print result:
+            {
+                "id": "e6c893a4-222e-4c60-a028-6a3b95dfc109",
+                "display_name": "my-classification-metrics",
+                "labels": ["cat", "dog"],
+                "matrix": [[9,1], [1,9]],
+                "fpr": [0.1, 0.5, 0.9],
+                "tpr": [0.1, 0.7, 0.9],
+                "thresholds": [0.9, 0.5, 0.1]
+            }
+        ```
+
+        Returns:
+            List of classification metrics logged to this experiment run.
+        """
+
+        artifact_list = artifact.Artifact.list(
+            filter=metadata_utils._make_filter_string(
+                in_context=[self.resource_name],
+                schema_title=google_artifact_schema.ClassificationMetrics.schema_title,
+            ),
+            project=self.project,
+            location=self.location,
+            credentials=self.credentials,
+        )
+
+        metrics = []
+        for metric_artifact in artifact_list:
+            metric = {}
+            metric["id"] = metric_artifact.name
+            metric["display_name"] = metric_artifact.display_name
+            metadata = metric_artifact.metadata
+            if "confusionMatrix" in metadata:
+                metric["labels"] = [
+                    d["displayName"]
+                    for d in metadata["confusionMatrix"]["annotationSpecs"]
+                ]
+                metric["matrix"] = metadata["confusionMatrix"]["rows"]
+
+            if "confidenceMetrics" in metadata:
+                metric["fpr"] = [
+                    d["falsePositiveRate"] for d in metadata["confidenceMetrics"]
+                ]
+                metric["tpr"] = [d["recall"] for d in metadata["confidenceMetrics"]]
+                metric["threshold"] = [
+                    d["confidenceThreshold"] for d in metadata["confidenceMetrics"]
+                ]
+            metrics.append(metric)
+
+        return metrics
+
     @_v1_not_supported
     def associate_execution(self, execution: execution.Execution):
         """Associate an execution to this experiment run.
diff --git a/google/cloud/aiplatform/metadata/metadata.py b/google/cloud/aiplatform/metadata/metadata.py
index 6f67a6ddf6..d103a79733 100644
--- a/google/cloud/aiplatform/metadata/metadata.py
+++ b/google/cloud/aiplatform/metadata/metadata.py
@@ -15,8 +15,7 @@
 # limitations under the License.
 #
 
-
-from typing import Dict, Union, Optional, Any
+from typing import Dict, Union, Optional, Any, List
 
 from google.api_core import exceptions
 from google.auth import credentials as auth_credentials
@@ -371,6 +370,62 @@ def log_metrics(self, metrics: Dict[str, Union[float, int, str]]):
         # query the latest metrics artifact resource before logging.
         self._experiment_run.log_metrics(metrics=metrics)
 
+    def log_classification_metrics(
+        self,
+        *,
+        labels: Optional[List[str]] = None,
+        matrix: Optional[List[List[int]]] = None,
+        fpr: Optional[List[float]] = None,
+        tpr: Optional[List[float]] = None,
+        threshold: Optional[List[float]] = None,
+        display_name: Optional[str] = None,
+    ):
+        """Create an artifact for classification metrics and log to ExperimentRun. Currently support confusion matrix and ROC curve.
+
+        ```
+        my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment')
+        my_run.log_classification_metrics(
+            display_name='my-classification-metrics',
+            labels=['cat', 'dog'],
+            matrix=[[9, 1], [1, 9]],
+            fpr=[0.1, 0.5, 0.9],
+            tpr=[0.1, 0.7, 0.9],
+            threshold=[0.9, 0.5, 0.1],
+        )
+        ```
+
+        Args:
+            labels (List[str]):
+                Optional. List of label names for the confusion matrix. Must be set if 'matrix' is set.
+            matrix (List[List[int]):
+                Optional. Values for the confusion matrix. Must be set if 'labels' is set.
+            fpr (List[float]):
+                Optional. List of false positive rates for the ROC curve. Must be set if 'tpr' or 'thresholds' is set.
+            tpr (List[float]):
+                Optional. List of true positive rates for the ROC curve. Must be set if 'fpr' or 'thresholds' is set.
+            threshold (List[float]):
+                Optional. List of thresholds for the ROC curve. Must be set if 'fpr' or 'tpr' is set.
+            display_name (str):
+                Optional. The user-defined name for the classification metric artifact.
+
+        Raises:
+            ValueError: if 'labels' and 'matrix' are not set together
+                        or if 'labels' and 'matrix' are not in the same length
+                        or if 'fpr' and 'tpr' and 'threshold' are not set together
+                        or if 'fpr' and 'tpr' and 'threshold' are not in the same length
+        """
+
+        self._validate_experiment_and_run(method_name="log_classification_metrics")
+        # query the latest metrics artifact resource before logging.
+        self._experiment_run.log_classification_metrics(
+            display_name=display_name,
+            labels=labels,
+            matrix=matrix,
+            fpr=fpr,
+            tpr=tpr,
+            threshold=threshold,
+        )
+
     def _validate_experiment_and_run(self, method_name: str):
         """Validates Experiment and Run are set and raises informative error message.
 
diff --git a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py
index e52f2f98b5..4941e42480 100644
--- a/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py
+++ b/google/cloud/aiplatform/metadata/schema/google/artifact_schema.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import copy
-from typing import Optional, Dict
+from typing import Optional, Dict, List
 
 from google.cloud.aiplatform.compat.types import artifact as gca_artifact
 from google.cloud.aiplatform.metadata.schema import base_artifact
@@ -24,6 +24,12 @@
 # The artifact property key for the resource_name
 _ARTIFACT_PROPERTY_KEY_RESOURCE_NAME = "resourceName"
 
+_CLASSIFICATION_METRICS_AGGREGATION_TYPE = [
+    "AGGREGATION_TYPE_UNSPECIFIED",
+    "MACRO_AVERAGE",
+    "MICRO_AVERAGE",
+]
+
 
 class VertexDataset(base_artifact.BaseArtifactSchema):
     """An artifact representing a Vertex Dataset."""
@@ -278,9 +284,17 @@ class ClassificationMetrics(base_artifact.BaseArtifactSchema):
     def __init__(
         self,
         *,
+        aggregation_type: Optional[str] = None,
+        aggregation_threshold: Optional[float] = None,
+        recall: Optional[float] = None,
+        precision: Optional[float] = None,
+        f1_score: Optional[float] = None,
+        accuracy: Optional[float] = None,
         au_prc: Optional[float] = None,
         au_roc: Optional[float] = None,
         log_loss: Optional[float] = None,
+        confusion_matrix: Optional[utils.ConfusionMatrix] = None,
+        confidence_metrics: Optional[List[utils.ConfidenceMetric]] = None,
         artifact_id: Optional[str] = None,
         uri: Optional[str] = None,
         display_name: Optional[str] = None,
@@ -290,6 +304,22 @@ def __init__(
         state: Optional[gca_artifact.Artifact.State] = gca_artifact.Artifact.State.LIVE,
     ):
         """Args:
+        aggregation_type (str):
+            Optional. The way to generate the aggregated metrics. Choose from the following options:
+            "AGGREGATION_TYPE_UNSPECIFIED": Indicating unset, used for per-class sliced metrics
+            "MACRO_AVERAGE": The unweighted average, default behavior
+            "MICRO_AVERAGE": The weighted average
+        aggregation_threshold (float):
+            Optional. The threshold used to generate aggregated metrics, default 0 for multi-class classification, 0.5 for binary classification.
+        recall (float):
+            Optional. Recall (True Positive Rate) for the given confidence threshold.
+        precision (float):
+            Optional. Precision for the given confidence threshold.
+        f1_score (float):
+            Optional. The harmonic mean of recall and precision.
+        accuracy (float):
+            Optional. Accuracy is the fraction of predictions given the correct label.
+            For multiclass this is a micro-average metric.
         au_prc (float):
             Optional. The Area Under Precision-Recall Curve metric.
             Micro-averaged for the overall evaluation.
@@ -298,6 +328,10 @@ def __init__(
             Micro-averaged for the overall evaluation.
         log_loss (float):
             Optional. The Log Loss metric.
+        confusion_matrix (utils.ConfusionMatrix):
+            Optional. Aggregated confusion matrix.
+        confidence_metrics (List[utils.ConfidenceMetric]):
+            Optional. List of metrics for different confidence thresholds.
         artifact_id (str):
             Optional. The <resource_id> portion of the Artifact name with
             the format. This is globally unique in a metadataStore:
@@ -323,12 +357,35 @@ def __init__(
             check the validity of state transitions.
         """
         extended_metadata = copy.deepcopy(metadata) if metadata else {}
-        if au_prc:
+        if aggregation_type:
+            if aggregation_type not in _CLASSIFICATION_METRICS_AGGREGATION_TYPE:
+                ## Todo: add negative test case for this
+                raise ValueError(
+                    "aggregation_type can only be 'AGGREGATION_TYPE_UNSPECIFIED', 'MACRO_AVERAGE', or 'MICRO_AVERAGE'."
+                )
+            extended_metadata["aggregationType"] = aggregation_type
+        if aggregation_threshold is not None:
+            extended_metadata["aggregationThreshold"] = aggregation_threshold
+        if recall is not None:
+            extended_metadata["recall"] = recall
+        if precision is not None:
+            extended_metadata["precision"] = precision
+        if f1_score is not None:
+            extended_metadata["f1Score"] = f1_score
+        if accuracy is not None:
+            extended_metadata["accuracy"] = accuracy
+        if au_prc is not None:
             extended_metadata["auPrc"] = au_prc
-        if au_roc:
+        if au_roc is not None:
             extended_metadata["auRoc"] = au_roc
-        if log_loss:
+        if log_loss is not None:
             extended_metadata["logLoss"] = log_loss
+        if confusion_matrix:
+            extended_metadata["confusionMatrix"] = confusion_matrix.to_dict()
+        if confidence_metrics:
+            extended_metadata["confidenceMetrics"] = [
+                confidence_metric.to_dict() for confidence_metric in confidence_metrics
+            ]
 
         super(ClassificationMetrics, self).__init__(
             uri=uri,
diff --git a/google/cloud/aiplatform/metadata/schema/utils.py b/google/cloud/aiplatform/metadata/schema/utils.py
index 1b4a5e4f6c..c6e23735b6 100644
--- a/google/cloud/aiplatform/metadata/schema/utils.py
+++ b/google/cloud/aiplatform/metadata/schema/utils.py
@@ -143,6 +143,155 @@ def to_dict(self):
         return results
 
 
+@dataclass
+class AnnotationSpec:
+    """A class that represents the annotation spec of a Confusion Matrix.
+    Args:
+        display_name (str):
+            Optional. Display name for a column of a confusion matrix.
+        id (str):
+            Optional. Id for a column of a confusion matrix.
+    """
+
+    display_name: Optional[str] = None
+    id: Optional[str] = None
+
+    def to_dict(self):
+        """ML metadata schema dictionary representation of this DataClass"""
+        results = {}
+        if self.display_name:
+            results["displayName"] = self.display_name
+        if self.id:
+            results["id"] = self.id
+
+        return results
+
+
+@dataclass
+class ConfusionMatrix:
+    """A class that represents a Confusion Matrix.
+    Args:
+        matrix (List[List[int]]):
+            Required. A 2D array of integers that represets the values for the confusion matrix.
+        annotation_specs: (List(AnnotationSpec)):
+            Optional. List of column annotation specs which contains display_name (str) and id (str)
+    """
+
+    matrix: List[List[int]]
+    annotation_specs: Optional[List[AnnotationSpec]] = None
+
+    def to_dict(self):
+        ## Todo: add a validation to check 'matrix' and 'annotation_specs' have the same length
+        """ML metadata schema dictionary representation of this DataClass"""
+        results = {}
+        if self.annotation_specs:
+            results["annotationSpecs"] = [
+                annotation_spec.to_dict() for annotation_spec in self.annotation_specs
+            ]
+        if self.matrix:
+            results["rows"] = self.matrix
+
+        return results
+
+
+@dataclass
+class ConfidenceMetric:
+    """A class that represents a Confidence Metric.
+    Args:
+        confidence_threshold (float):
+            Required. Metrics are computed with an assumption that the Model never returns predictions with a score lower than this value.
+            For binary classification this is the positive class threshold. For multi-class classification this is the confidence threshold.
+        recall (float):
+            Optional. Recall (True Positive Rate) for the given confidence threshold.
+        precision (float):
+            Optional. Precision for the given confidence threshold.
+        f1_score (float):
+            Optional. The harmonic mean of recall and precision.
+        max_predictions (int):
+            Optional. Metrics are computed with an assumption that the Model always returns at most this many predictions (ordered by their score, descendingly).
+            But they all still need to meet the `confidence_threshold`.
+        false_positive_rate (float):
+            Optional. False Positive Rate for the given confidence threshold.
+        accuracy (float):
+            Optional. Accuracy is the fraction of predictions given the correct label. For multiclass this is a micro-average metric.
+        true_positive_count (int):
+            Optional. The number of Model created labels that match a ground truth label.
+        false_positive_count (int):
+            Optional. The number of Model created labels that do not match a ground truth label.
+        false_negative_count (int):
+            Optional. The number of ground truth labels that are not matched by a Model created label.
+        true_negative_count (int):
+            Optional. The number of labels that were not created by the Model, but if they would, they would not match a ground truth label.
+        recall_at_1 (float):
+            Optional. The Recall (True Positive Rate) when only considering the label that has the highest prediction score
+            and not below the confidence threshold for each DataItem.
+        precision_at_1 (float):
+            Optional. The precision when only considering the label that has the highest prediction score
+            and not below the confidence threshold for each DataItem.
+        false_positive_rate_at_1 (float):
+            Optional. The False Positive Rate when only considering the label that has the highest prediction score
+            and not below the confidence threshold for each DataItem.
+        f1_score_at_1 (float):
+            Optional. The harmonic mean of recallAt1 and precisionAt1.
+        confusion_matrix (ConfusionMatrix):
+            Optional. Confusion matrix for the given confidence threshold.
+    """
+
+    confidence_threshold: float
+    recall: Optional[float] = None
+    precision: Optional[float] = None
+    f1_score: Optional[float] = None
+    max_predictions: Optional[int] = None
+    false_positive_rate: Optional[float] = None
+    accuracy: Optional[float] = None
+    true_positive_count: Optional[int] = None
+    false_positive_count: Optional[int] = None
+    false_negative_count: Optional[int] = None
+    true_negative_count: Optional[int] = None
+    recall_at_1: Optional[float] = None
+    precision_at_1: Optional[float] = None
+    false_positive_rate_at_1: Optional[float] = None
+    f1_score_at_1: Optional[float] = None
+    confusion_matrix: Optional[ConfusionMatrix] = None
+
+    def to_dict(self):
+        """ML metadata schema dictionary representation of this DataClass"""
+        results = {}
+        results["confidenceThreshold"] = self.confidence_threshold
+        if self.recall is not None:
+            results["recall"] = self.recall
+        if self.precision is not None:
+            results["precision"] = self.precision
+        if self.f1_score is not None:
+            results["f1Score"] = self.f1_score
+        if self.max_predictions is not None:
+            results["maxPredictions"] = self.max_predictions
+        if self.false_positive_rate is not None:
+            results["falsePositiveRate"] = self.false_positive_rate
+        if self.accuracy is not None:
+            results["accuracy"] = self.accuracy
+        if self.true_positive_count is not None:
+            results["truePositiveCount"] = self.true_positive_count
+        if self.false_positive_count is not None:
+            results["falsePositiveCount"] = self.false_positive_count
+        if self.false_negative_count is not None:
+            results["falseNegativeCount"] = self.false_negative_count
+        if self.true_negative_count is not None:
+            results["trueNegativeCount"] = self.true_negative_count
+        if self.recall_at_1 is not None:
+            results["recallAt1"] = self.recall_at_1
+        if self.precision_at_1 is not None:
+            results["precisionAt1"] = self.precision_at_1
+        if self.false_positive_rate_at_1 is not None:
+            results["falsePositiveRateAt1"] = self.false_positive_rate_at_1
+        if self.f1_score_at_1 is not None:
+            results["f1ScoreAt1"] = self.f1_score_at_1
+        if self.confusion_matrix:
+            results["confusionMatrix"] = self.confusion_matrix.to_dict()
+
+        return results
+
+
 def create_uri_from_resource_name(resource_name: str) -> str:
     """Construct the service URI for a given resource_name.
     Args:
diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py
index 0f984aa7ee..b5bec845ba 100644
--- a/samples/model-builder/conftest.py
+++ b/samples/model-builder/conftest.py
@@ -1,4 +1,4 @@
-# Copyright 2021 Google LLC
+# Copyright 2022 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -643,6 +643,12 @@ def mock_time_series_metrics():
     yield mock
 
 
+@pytest.fixture
+def mock_classification_metrics():
+    mock = MagicMock()
+    yield mock
+
+
 @pytest.fixture
 def mock_get_execution(mock_execution):
     with patch.object(aiplatform, "Execution") as mock_get_execution:
@@ -825,6 +831,13 @@ def mock_log_params():
         yield mock_log_params
 
 
+@pytest.fixture
+def mock_log_classification_metrics():
+    with patch.object(aiplatform, "log_classification_metrics") as mock_log_metrics:
+        mock_log_metrics.return_value = None
+        yield mock_log_metrics
+
+
 @pytest.fixture
 def mock_log_pipeline_job():
     with patch.object(aiplatform, "log") as mock_log_pipeline_job:
@@ -883,6 +896,13 @@ def mock_get_time_series_metrics(mock_time_series_metrics, mock_experiment_run):
         yield mock_get_time_series_metrics
 
 
+@pytest.fixture
+def mock_get_classification_metrics(mock_classification_metrics, mock_experiment_run):
+    with patch.object(mock_experiment_run, "get_classification_metrics") as mock_get_classification_metrics:
+        mock_get_classification_metrics.return_value = mock_classification_metrics
+        yield mock_get_classification_metrics
+
+
 """
 ----------------------------------------------------------------------------
 Model Versioning Fixtures
diff --git a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py
new file mode 100644
index 0000000000..284ed9f968
--- /dev/null
+++ b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample.py
@@ -0,0 +1,34 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, Union
+
+from google.cloud import aiplatform
+
+
+#  [START aiplatform_sdk_get_experiment_run_classification_metrics_sample]
+def get_experiment_run_classification_metrics_sample(
+    run_name: str,
+    experiment: Union[str, aiplatform.Experiment],
+    project: str,
+    location: str,
+) -> List[Dict[str, Union[str, List]]]:
+    experiment_run = aiplatform.ExperimentRun(
+        run_name=run_name, experiment=experiment, project=project, location=location
+    )
+
+    return experiment_run.get_classification_metrics()
+
+
+#  [END aiplatform_sdk_get_experiment_run_classification_metrics_sample]
diff --git a/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py
new file mode 100644
index 0000000000..3f6deb80bf
--- /dev/null
+++ b/samples/model-builder/experiment_tracking/get_experiment_run_classification_metrics_sample_test.py
@@ -0,0 +1,34 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import get_experiment_run_classification_metrics_sample
+
+import pytest
+
+import test_constants as constants
+
+
+@pytest.mark.usefixtures("mock_get_run")
+def test_get_experiment_run_classification_metrics_sample(mock_get_classification_metrics, mock_classification_metrics):
+
+    classification_metrics = get_experiment_run_classification_metrics_sample.get_experiment_run_classification_metrics_sample(
+        run_name=constants.EXPERIMENT_RUN_NAME,
+        experiment=constants.EXPERIMENT_NAME,
+        project=constants.PROJECT,
+        location=constants.LOCATION,
+    )
+
+    mock_get_classification_metrics.assert_called_with()
+
+    assert classification_metrics is mock_classification_metrics
diff --git a/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py
new file mode 100644
index 0000000000..e178356c9c
--- /dev/null
+++ b/samples/model-builder/experiment_tracking/log_classification_metrics_sample.py
@@ -0,0 +1,47 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional
+
+from google.cloud import aiplatform
+
+
+#  [START aiplatform_sdk_log_classification_metrics_sample]
+def log_classification_metrics_sample(
+    experiment_name: str,
+    run_name: str,
+    project: str,
+    location: str,
+    labels: Optional[List[str]] = None,
+    matrix: Optional[List[List[int]]] = None,
+    fpr: Optional[List[float]] = None,
+    tpr: Optional[List[float]] = None,
+    threshold: Optional[List[float]] = None,
+    display_name: Optional[str] = None,
+) -> None:
+    aiplatform.init(experiment=experiment_name, project=project, location=location)
+
+    aiplatform.start_run(run=run_name, resume=True)
+
+    aiplatform.log_classification_metrics(
+        labels=labels,
+        matrix=matrix,
+        fpr=fpr,
+        tpr=tpr,
+        threshold=threshold,
+        display_name=display_name,
+    )
+
+
+#  [END aiplatform_sdk_log_classification_metrics_sample]
diff --git a/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py b/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py
new file mode 100644
index 0000000000..c15fd0b123
--- /dev/null
+++ b/samples/model-builder/experiment_tracking/log_classification_metrics_sample_test.py
@@ -0,0 +1,45 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import log_classification_metrics_sample
+
+import pytest
+
+import test_constants as constants
+
+
+@pytest.mark.usefixtures("mock_sdk_init", "mock_start_run")
+def test_log_metrics_sample(mock_log_classification_metrics):
+
+    log_classification_metrics_sample.log_classification_metrics_sample(
+        experiment_name=constants.EXPERIMENT_NAME,
+        run_name=constants.EXPERIMENT_RUN_NAME,
+        project=constants.PROJECT,
+        location=constants.LOCATION,
+        labels=constants.CLASSIFICATION_METRICS["labels"],
+        matrix=constants.CLASSIFICATION_METRICS["matrix"],
+        fpr=constants.CLASSIFICATION_METRICS["fpr"],
+        tpr=constants.CLASSIFICATION_METRICS["tpr"],
+        threshold=constants.CLASSIFICATION_METRICS["threshold"],
+        display_name=constants.CLASSIFICATION_METRICS["display_name"],
+    )
+
+    mock_log_classification_metrics.assert_called_with(
+        labels=constants.CLASSIFICATION_METRICS["labels"],
+        matrix=constants.CLASSIFICATION_METRICS["matrix"],
+        fpr=constants.CLASSIFICATION_METRICS["fpr"],
+        tpr=constants.CLASSIFICATION_METRICS["tpr"],
+        threshold=constants.CLASSIFICATION_METRICS["threshold"],
+        display_name=constants.CLASSIFICATION_METRICS["display_name"],
+    )
diff --git a/samples/model-builder/test_constants.py b/samples/model-builder/test_constants.py
index 1ff2b1d96e..76f8d7673b 100644
--- a/samples/model-builder/test_constants.py
+++ b/samples/model-builder/test_constants.py
@@ -272,7 +272,14 @@
 
 METRICS = {"accuracy": 0.1}
 PARAMS = {"learning_rate": 0.1}
-
+CLASSIFICATION_METRICS = {
+    "display_name": "my-classification-metrics",
+    "labels": ["cat", "dog"],
+    "matrix": [[9, 1], [1, 9]],
+    "fpr": [0.1, 0.5, 0.9],
+    "tpr": [0.1, 0.7, 0.9],
+    "threshold": [0.9, 0.5, 0.1],
+}
 TEMPLATE_PATH = "pipeline.json"
 
 STEP = 1
diff --git a/tests/system/aiplatform/test_experiments.py b/tests/system/aiplatform/test_experiments.py
index ada7c68f82..83d96d945e 100644
--- a/tests/system/aiplatform/test_experiments.py
+++ b/tests/system/aiplatform/test_experiments.py
@@ -37,6 +37,15 @@
 
 _TIME_SERIES_METRIC_KEY = "accuracy"
 
+_CLASSIFICATION_METRICS = {
+    "display_name": "my-classification-metrics",
+    "labels": ["cat", "dog"],
+    "matrix": [[9, 1], [1, 9]],
+    "fpr": [0.1, 0.5, 0.9],
+    "tpr": [0.1, 0.7, 0.9],
+    "threshold": [0.9, 0.5, 0.1],
+}
+
 
 @pytest.mark.usefixtures(
     "prepare_staging_bucket", "delete_staging_bucket", "tear_down_resources"
@@ -145,6 +154,28 @@ def test_log_time_series_metrics(self):
             _TIME_SERIES_METRIC_KEY: [float(value) for value in range(5)],
         }
 
+    def test_log_classification_metrics(self, shared_state):
+        aiplatform.init(
+            project=e2e_base._PROJECT,
+            location=e2e_base._LOCATION,
+            experiment=self._experiment_name,
+        )
+        aiplatform.start_run(_RUN, resume=True)
+        aiplatform.log_classification_metrics(
+            display_name=_CLASSIFICATION_METRICS["display_name"],
+            labels=_CLASSIFICATION_METRICS["labels"],
+            matrix=_CLASSIFICATION_METRICS["matrix"],
+            fpr=_CLASSIFICATION_METRICS["fpr"],
+            tpr=_CLASSIFICATION_METRICS["tpr"],
+            threshold=_CLASSIFICATION_METRICS["threshold"],
+        )
+
+        run = aiplatform.ExperimentRun(run_name=_RUN, experiment=self._experiment_name)
+        metrics = run.get_classification_metrics()[0]
+        metric_artifact = aiplatform.Artifact(metrics.pop("id"))
+        assert metrics == _CLASSIFICATION_METRICS
+        metric_artifact.delete()
+
     def test_create_artifact(self, shared_state):
         ds = aiplatform.Artifact.create(
             schema_title="system.Dataset",
diff --git a/tests/unit/aiplatform/test_metadata.py b/tests/unit/aiplatform/test_metadata.py
index ba5a527683..a8a73b899e 100644
--- a/tests/unit/aiplatform/test_metadata.py
+++ b/tests/unit/aiplatform/test_metadata.py
@@ -56,6 +56,7 @@
 from google.cloud.aiplatform.metadata import metadata
 from google.cloud.aiplatform.metadata import metadata_store
 from google.cloud.aiplatform.metadata import utils as metadata_utils
+
 from google.cloud.aiplatform import utils
 
 from test_pipeline_jobs import mock_pipeline_service_get  # noqa: F401
@@ -123,6 +124,16 @@
 _TEST_METRICS = {_TEST_METRIC_KEY_1: 222, _TEST_METRIC_KEY_2: 1}
 _TEST_OTHER_METRICS = {_TEST_METRIC_KEY_2: 0.9}
 
+# classification_metrics
+_TEST_CLASSIFICATION_METRICS = {
+    "display_name": "my-classification-metrics",
+    "labels": ["cat", "dog"],
+    "matrix": [[9, 1], [1, 9]],
+    "fpr": [0.1, 0.5, 0.9],
+    "tpr": [0.1, 0.7, 0.9],
+    "threshold": [0.9, 0.5, 0.1],
+}
+
 # schema
 _TEST_WRONG_SCHEMA_TITLE = "system.WrongSchema"
 
@@ -408,6 +419,50 @@ def query_execution_inputs_and_outputs_mock():
         yield query_execution_inputs_and_outputs_mock
 
 
+_TEST_CLASSIFICATION_METRICS_METADATA = {
+    "confusionMatrix": {
+        "annotationSpecs": [{"displayName": "cat"}, {"displayName": "dog"}],
+        "rows": [[9, 1], [1, 9]],
+    },
+    "confidenceMetrics": [
+        {"confidenceThreshold": 0.9, "recall": 0.1, "falsePositiveRate": 0.1},
+        {"confidenceThreshold": 0.5, "recall": 0.7, "falsePositiveRate": 0.5},
+        {"confidenceThreshold": 0.1, "recall": 0.9, "falsePositiveRate": 0.9},
+    ],
+}
+
+_TEST_CLASSIFICATION_METRICS_ARTIFACT = GapicArtifact(
+    name=_TEST_ARTIFACT_NAME,
+    display_name=_TEST_CLASSIFICATION_METRICS["display_name"],
+    schema_title=constants.GOOGLE_CLASSIFICATION_METRICS,
+    schema_version=constants._DEFAULT_SCHEMA_VERSION,
+    metadata=_TEST_CLASSIFICATION_METRICS_METADATA,
+    state=GapicArtifact.State.LIVE,
+)
+
+
+@pytest.fixture
+def create_classification_metrics_artifact_mock():
+    with patch.object(
+        MetadataServiceClient, "create_artifact"
+    ) as create_classification_metrics_artifact_mock:
+        create_classification_metrics_artifact_mock.return_value = (
+            _TEST_CLASSIFICATION_METRICS_ARTIFACT
+        )
+        yield create_classification_metrics_artifact_mock
+
+
+@pytest.fixture
+def get_classification_metrics_artifact_mock():
+    with patch.object(
+        MetadataServiceClient, "get_artifact"
+    ) as get_classification_metrics_artifact_mock:
+        get_classification_metrics_artifact_mock.return_value = (
+            _TEST_CLASSIFICATION_METRICS_ARTIFACT
+        )
+        yield get_classification_metrics_artifact_mock
+
+
 @pytest.fixture
 def get_artifact_mock():
     with patch.object(MetadataServiceClient, "get_artifact") as get_artifact_mock:
@@ -1131,6 +1186,56 @@ def test_log_metrics(self, update_context_mock):
 
         update_context_mock.assert_called_once_with(context=_TRUE_CONTEXT)
 
+    @pytest.mark.usefixtures(
+        "get_metadata_store_mock",
+        "get_experiment_mock",
+        "create_experiment_run_context_mock",
+        "add_context_children_mock",
+    )
+    def test_log_classification_metrics(
+        self,
+        create_classification_metrics_artifact_mock,
+        get_classification_metrics_artifact_mock,
+        add_context_artifacts_and_executions_mock,
+    ):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            experiment=_TEST_EXPERIMENT,
+        )
+        aiplatform.start_run(_TEST_RUN)
+        aiplatform.log_classification_metrics(
+            display_name=_TEST_CLASSIFICATION_METRICS["display_name"],
+            labels=_TEST_CLASSIFICATION_METRICS["labels"],
+            matrix=_TEST_CLASSIFICATION_METRICS["matrix"],
+            fpr=_TEST_CLASSIFICATION_METRICS["fpr"],
+            tpr=_TEST_CLASSIFICATION_METRICS["tpr"],
+            threshold=_TEST_CLASSIFICATION_METRICS["threshold"],
+        )
+
+        expected_artifact = GapicArtifact(
+            display_name=_TEST_CLASSIFICATION_METRICS["display_name"],
+            schema_title=constants.GOOGLE_CLASSIFICATION_METRICS,
+            schema_version=constants._DEFAULT_SCHEMA_VERSION,
+            metadata=_TEST_CLASSIFICATION_METRICS_METADATA,
+            state=GapicArtifact.State.LIVE,
+        )
+        create_classification_metrics_artifact_mock.assert_called_once_with(
+            parent=_TEST_PARENT,
+            artifact=expected_artifact,
+            artifact_id=None,
+        )
+
+        get_classification_metrics_artifact_mock.assert_called_once_with(
+            name=_TEST_ARTIFACT_NAME, retry=base._DEFAULT_RETRY
+        )
+
+        add_context_artifacts_and_executions_mock.assert_called_once_with(
+            context=_TEST_EXPERIMENT_RUN_CONTEXT_NAME,
+            artifacts=[_TEST_ARTIFACT_NAME],
+            executions=None,
+        )
+
     @pytest.mark.usefixtures(
         "get_metadata_store_mock",
         "get_experiment_mock",
diff --git a/tests/unit/aiplatform/test_metadata_schema.py b/tests/unit/aiplatform/test_metadata_schema.py
index 0003838968..8af4d351da 100644
--- a/tests/unit/aiplatform/test_metadata_schema.py
+++ b/tests/unit/aiplatform/test_metadata_schema.py
@@ -64,7 +64,7 @@
 _TEST_DESCRIPTION = "test description"
 _TEST_METADATA = {"test-param1": 1, "test-param2": "test-value", "test-param3": True}
 _TEST_UPDATED_METADATA = {
-    "test-param1": 2,
+    "test-param1": 2.0,
     "test-param2": "test-value-1",
     "test-param3": False,
 }
@@ -748,14 +748,46 @@ def test_classification_metrics_title_is_set_correctly(self):
         assert artifact.schema_title == "google.ClassificationMetrics"
 
     def test_classification_metrics_constructor_parameters_are_set_correctly(self):
+        aggregation_type = "MACRO_AVERAGE"
+        aggregation_threshold = 0.5
+        recall = 0.5
+        precision = 0.5
+        f1_score = 0.5
+        accuracy = 0.5
         au_prc = 1.0
         au_roc = 2.0
         log_loss = 0.5
+        confusion_matrix = utils.ConfusionMatrix(
+            matrix=[[9.0, 1.0], [1.0, 9.0]],
+            annotation_specs=[
+                utils.AnnotationSpec(display_name="cat"),
+                utils.AnnotationSpec(display_name="dog"),
+            ],
+        )
+        confidence_metrics = [
+            utils.ConfidenceMetric(
+                confidence_threshold=0.9, recall=0.1, false_positive_rate=0.1
+            ),
+            utils.ConfidenceMetric(
+                confidence_threshold=0.5, recall=0.5, false_positive_rate=0.7
+            ),
+            utils.ConfidenceMetric(
+                confidence_threshold=0.1, recall=0.9, false_positive_rate=0.9
+            ),
+        ]
 
         artifact = google_artifact_schema.ClassificationMetrics(
+            aggregation_type=aggregation_type,
+            aggregation_threshold=aggregation_threshold,
+            recall=recall,
+            precision=precision,
+            f1_score=f1_score,
+            accuracy=accuracy,
             au_prc=au_prc,
             au_roc=au_roc,
             log_loss=log_loss,
+            confusion_matrix=confusion_matrix,
+            confidence_metrics=confidence_metrics,
             artifact_id=_TEST_ARTIFACT_ID,
             uri=_TEST_URI,
             display_name=_TEST_DISPLAY_NAME,
@@ -764,12 +796,22 @@ def test_classification_metrics_constructor_parameters_are_set_correctly(self):
             metadata=_TEST_UPDATED_METADATA,
         )
         expected_metadata = {
-            "test-param1": 2.0,
-            "test-param2": "test-value-1",
-            "test-param3": False,
-            "auPrc": 1.0,
-            "auRoc": 2.0,
-            "logLoss": 0.5,
+            "test-param1": _TEST_UPDATED_METADATA["test-param1"],
+            "test-param2": _TEST_UPDATED_METADATA["test-param2"],
+            "test-param3": _TEST_UPDATED_METADATA["test-param3"],
+            "aggregationType": aggregation_type,
+            "aggregationThreshold": aggregation_threshold,
+            "recall": recall,
+            "precision": precision,
+            "f1Score": f1_score,
+            "accuracy": accuracy,
+            "auPrc": au_prc,
+            "auRoc": au_roc,
+            "logLoss": log_loss,
+            "confusionMatrix": confusion_matrix.to_dict(),
+            "confidenceMetrics": [
+                confidence_metric.to_dict() for confidence_metric in confidence_metrics
+            ],
         }
 
         assert artifact.artifact_id == _TEST_ARTIFACT_ID