From a67e7b86dcca30f9c94a80a1bd0b3eff40a74bda Mon Sep 17 00:00:00 2001 From: taiseiak Date: Thu, 16 Dec 2021 17:38:32 -0800 Subject: [PATCH 1/7] feat: Add XAI SDK integration to TensorFlow models with LIT integration Add automatic addition of feature attribution for TensorFlow 2 models in the LIT integration on Vertex Notebooks. Detects for Vertex Notebooks by looking for the same enviornment variable to check for Vertex Notebooks as the LIT library does. --- google/cloud/aiplatform/explain/lit.py | 147 ++++++++++++++++------ setup.py | 7 +- tests/unit/aiplatform/test_explain_lit.py | 97 ++++++++++++-- 3 files changed, 204 insertions(+), 47 deletions(-) diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py index 432dd9194a..3d9950f8af 100644 --- a/google/cloud/aiplatform/explain/lit.py +++ b/google/cloud/aiplatform/explain/lit.py @@ -15,11 +15,13 @@ # limitations under the License. from typing import Dict, List, Tuple, Union +import os try: from lit_nlp.api import dataset as lit_dataset from lit_nlp.api import model as lit_model from lit_nlp.api import types as lit_types + from lit_nlp.api import dtypes as lit_dtypes from lit_nlp import notebook except ImportError: raise ImportError( @@ -82,6 +84,7 @@ def __init__( model: str, input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + attribution_method: str = "sampled_shapley", ): """Construct a VertexLitModel. Args: @@ -94,39 +97,35 @@ def __init__( output_types: Required. An OrderedDict of string names matching the labels of the model as the key, and the associated LitType of the label. + attribution_method: + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. """ - self._loaded_model = tf.saved_model.load(model) - serving_default = self._loaded_model.signatures[ - tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY - ] - _, self._kwargs_signature = serving_default.structured_input_signature - self._output_signature = serving_default.structured_outputs - - if len(self._kwargs_signature) != 1: - raise ValueError("Please use a model with only one input tensor.") - - if len(self._output_signature) != 1: - raise ValueError("Please use a model with only one output tensor.") - + self._load_model(model) self._input_types = input_types self._output_types = output_types + self._input_tensor_name = next(iter(self._kwargs_signature)) + self._attribution_explainer = None + if os.environ.get("LIT_PROXY_URL"): + self._set_up_attribution_explainer(model, attribution_method) + + @property + def attribution_explainer( + self, + ) -> Union["AttributionExplainer", None]: # noqa: F821 + """Gets the attribution explainer property if set.""" + return self._attribution_explainer def predict_minibatch( self, inputs: List[lit_types.JsonDict] ) -> List[lit_types.JsonDict]: - """Returns predictions for a single batch of examples. - Args: - inputs: - sequence of inputs, following model.input_spec() - Returns: - list of outputs, following model.output_spec() - """ instances = [] for input in inputs: instance = [input[feature] for feature in self._input_types] instances.append(instance) prediction_input_dict = { - next(iter(self._kwargs_signature)): tf.convert_to_tensor(instances) + self._input_tensor_name: tf.convert_to_tensor(instances) } prediction_dict = self._loaded_model.signatures[ tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -140,6 +139,15 @@ def predict_minibatch( for label, value in zip(self._output_types.keys(), prediction) } ) + # Get feature attributions + if self.attribution_explainer: + attributions = self.attribution_explainer.explain( + [{self._input_tensor_name: i} for i in instances] + ) + for i, attribution in enumerate(attributions): + outputs[i]["feature_attribution"] = lit_dtypes.FeatureSalience( + attribution.feature_importance() + ) return outputs def input_spec(self) -> lit_types.Spec: @@ -148,7 +156,52 @@ def input_spec(self) -> lit_types.Spec: def output_spec(self) -> lit_types.Spec: """Return a spec describing model outputs.""" - return self._output_types + output_spec_dict = dict(self._output_types) + if self.attribution_explainer: + output_spec_dict["feature_attribution"] = lit_types.FeatureSalience( + signed=True + ) + return output_spec_dict + + def _load_model(self, model: str): + self._loaded_model = tf.saved_model.load(model) + serving_default = self._loaded_model.signatures[ + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ] + _, self._kwargs_signature = serving_default.structured_input_signature + self._output_signature = serving_default.structured_outputs + + if len(self._kwargs_signature) != 1: + raise ValueError("Please use a model with only one input tensor.") + + if len(self._output_signature) != 1: + raise ValueError("Please use a model with only one output tensor.") + + def _set_up_attribution_explainer( + self, model: str, attribution_method: str = "integrated_gradients" + ): + try: + import explainable_ai_sdk + from explainable_ai_sdk.metadata.tf.v2 import SavedModelMetadataBuilder + except ImportError: + return + + builder = SavedModelMetadataBuilder(model) + builder.get_metadata() + builder.set_numeric_metadata( + self._input_tensor_name, + index_feature_mapping=list(self._input_types.keys()), + ) + builder.save_metadata(model) + if attribution_method == "integrated_gradients": + explainer_config = explainable_ai_sdk.IntegratedGradientsConfig() + else: + explainer_config = explainable_ai_sdk.SampledShapleyConfig() + + self._attribution_explainer = explainable_ai_sdk.load_model_from_local_path( + model, explainer_config + ) + self._load_model(model) def create_lit_dataset( @@ -172,22 +225,27 @@ def create_lit_model( model: str, input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + attribution_method: str = "sampled_shapley", ) -> lit_model.Model: """Creates a LIT Model object. Args: model: - Required. A string reference to a local TensorFlow saved model directory. - The model must have at most one input and one output tensor. + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. input_types: - Required. An OrderedDict of string names matching the features of the model - as the key, and the associated LitType of the feature. + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. output_types: - Required. An OrderedDict of string names matching the labels of the model - as the key, and the associated LitType of the label. + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + attribution_method: + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. Returns: A LIT Model object that has the same functionality as the model provided. """ - return _VertexLitModel(model, input_types, output_types) + return _VertexLitModel(model, input_types, output_types, attribution_method) def open_lit( @@ -198,11 +256,11 @@ def open_lit( """Open LIT from the provided models and datasets. Args: models: - Required. A list of LIT models to open LIT with. + Required. A list of LIT models to open LIT with. input_types: - Required. A lit of LIT datasets to open LIT with. + Required. A lit of LIT datasets to open LIT with. open_in_new_tab: - Optional. A boolean to choose if LIT open in a new tab or not. + Optional. A boolean to choose if LIT open in a new tab or not. Raises: ImportError if LIT is not installed. """ @@ -216,24 +274,31 @@ def set_up_and_open_lit( model: Union[str, lit_model.Model], input_types: Union[List[str], Dict[str, lit_types.LitType]], output_types: Union[str, List[str], Dict[str, lit_types.LitType]], + attribution_method: str = "sampled_shapley", open_in_new_tab: bool = True, ) -> Tuple[lit_dataset.Dataset, lit_model.Model]: """Creates a LIT dataset and model and opens LIT. Args: - dataset: + dataset: Required. A Pandas DataFrame that includes feature column names and data. - column_types: + column_types: Required. An OrderedDict of string names matching the columns of the dataset as the key, and the associated LitType of the column. - model: + model: Required. A string reference to a TensorFlow saved model directory. The model must have at most one input and one output tensor. - input_types: + input_types: Required. An OrderedDict of string names matching the features of the model as the key, and the associated LitType of the feature. - output_types: + output_types: Required. An OrderedDict of string names matching the labels of the model as the key, and the associated LitType of the label. + attribution_method: + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. + open_in_new_tab: + Optional. A boolean to choose if LIT open in a new tab or not. Returns: A Tuple of the LIT dataset and model created. Raises: @@ -244,8 +309,12 @@ def set_up_and_open_lit( dataset = create_lit_dataset(dataset, column_types) if not isinstance(model, lit_model.Model): - model = create_lit_model(model, input_types, output_types) + model = create_lit_model( + model, input_types, output_types, attribution_method=attribution_method + ) - open_lit({"model": model}, {"dataset": dataset}, open_in_new_tab=open_in_new_tab) + open_lit( + {"model": model}, {"dataset": dataset}, open_in_new_tab=open_in_new_tab, + ) return dataset, model diff --git a/setup.py b/setup.py index c4ceaf9d40..2cf62fb3a7 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,12 @@ tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] metadata_extra_require = ["pandas >= 1.0.0"] xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] -lit_extra_require = ["tensorflow >= 2.3.0", "pandas >= 1.0.0", "lit-nlp >= 0.4.0"] +lit_extra_require = [ + "tensorflow >= 2.3.0", + "pandas >= 1.0.0", + "lit-nlp >= 0.4.0", + "explainable-ai-sdk >= 1.0.0", +] profiler_extra_require = [ "tensorboard-plugin-profile >= 2.4.0", "werkzeug >= 2.0.0", diff --git a/tests/unit/aiplatform/test_explain_lit.py b/tests/unit/aiplatform/test_explain_lit.py index 718f62b022..9b7f17da69 100644 --- a/tests/unit/aiplatform/test_explain_lit.py +++ b/tests/unit/aiplatform/test_explain_lit.py @@ -15,6 +15,7 @@ # limitations under the License. import collections +import os import pytest import tensorflow as tf import pandas as pd @@ -22,6 +23,7 @@ from lit_nlp.api import types as lit_types from lit_nlp import notebook from unittest import mock +import explainable_ai_sdk from google.cloud.aiplatform.explain.lit import ( create_lit_dataset, create_lit_model, @@ -36,6 +38,33 @@ def widget_render_mock(): yield render_mock +@pytest.fixture +def sampled_shapley_explainer_mock(): + with mock.patch.object( + explainable_ai_sdk, "SampledShapleyConfig", create=True + ) as config_mock: + yield config_mock + + +@pytest.fixture +def load_model_from_local_path_mock(): + with mock.patch.object( + explainable_ai_sdk, "load_model_from_local_path", autospec=True + ) as explainer_mock: + model_mock = mock.Mock() + explanation_mock = mock.Mock() + explanation_mock.feature_importance.return_value = { + "feature_1": 0.01, + "feature_2": 0.1, + } + model_mock.explain.return_value = [ + explanation_mock + # , explanation_mock + ] + explainer_mock.return_value = model_mock + yield explainer_mock + + @pytest.fixture def set_up_sequential(tmpdir): # Set up a sequential model @@ -55,7 +84,7 @@ def set_up_sequential(tmpdir): @pytest.fixture def set_up_pandas_dataframe_and_columns(): dataframe = pd.DataFrame.from_dict( - {"feature_1": [1.0, 2.0], "feature_2": [3.0, 4.0], "label": [1.0, 0.0]} + {"feature_1": [1.0], "feature_2": [3.0], "label": [1.0]} ) columns = collections.OrderedDict( [ @@ -74,7 +103,6 @@ def test_create_lit_dataset_from_pandas_returns_dataset( lit_dataset = create_lit_dataset(pd_dataset, lit_columns) expected_examples = [ {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, - {"feature_1": 2.0, "feature_2": 4.0, "label": 0.0}, ] assert lit_dataset.spec() == dict(lit_columns) @@ -86,18 +114,39 @@ def test_create_lit_model_from_tensorflow_returns_model(set_up_sequential): lit_model = create_lit_model(saved_model_path, feature_types, label_types) test_inputs = [ {"feature_1": 1.0, "feature_2": 2.0}, - {"feature_1": 3.0, "feature_2": 4.0}, ] outputs = lit_model.predict_minibatch(test_inputs) assert lit_model.input_spec() == dict(feature_types) assert lit_model.output_spec() == dict(label_types) - assert len(outputs) == 2 + assert len(outputs) == 1 for item in outputs: assert item.keys() == {"label"} assert len(item.values()) == 1 +@mock.patch.dict(os.environ, {"LIT_PROXY_URL": "auto"}) +@pytest.mark.usefixtures( + "sampled_shapley_explainer_mock", "load_model_from_local_path_mock" +) +def test_create_lit_model_from_tensorflow_with_xai_returns_model(set_up_sequential): + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict( + {**label_types, "feature_attribution": lit_types.FeatureSalience(signed=True)} + ) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label", "feature_attribution"} + assert len(item.values()) == 2 + + def test_open_lit( set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock ): @@ -121,11 +170,9 @@ def test_set_up_and_open_lit( expected_examples = [ {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, - {"feature_1": 2.0, "feature_2": 4.0, "label": 0.0}, ] test_inputs = [ {"feature_1": 1.0, "feature_2": 2.0}, - {"feature_1": 3.0, "feature_2": 4.0}, ] outputs = lit_model.predict_minibatch(test_inputs) @@ -134,9 +181,45 @@ def test_set_up_and_open_lit( assert lit_model.input_spec() == dict(feature_types) assert lit_model.output_spec() == dict(label_types) - assert len(outputs) == 2 + assert len(outputs) == 1 for item in outputs: assert item.keys() == {"label"} assert len(item.values()) == 1 widget_render_mock.assert_called_once() + + +@mock.patch.dict(os.environ, {"LIT_PROXY_URL": "auto"}) +@pytest.mark.usefixtures( + "sampled_shapley_explainer_mock", "load_model_from_local_path_mock" +) +def test_set_up_and_open_lit_with_xai( + set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock +): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + feature_types, label_types, saved_model_path = set_up_sequential + lit_dataset, lit_model = set_up_and_open_lit( + pd_dataset, lit_columns, saved_model_path, feature_types, label_types + ) + + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + ] + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict( + {**label_types, "feature_attribution": lit_types.FeatureSalience(signed=True)} + ) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label", "feature_attribution"} + assert len(item.values()) == 2 + + widget_render_mock.assert_called_once() From cb3d243d8431d7b7ccf28370078bb92ad3b6dffa Mon Sep 17 00:00:00 2001 From: taiseiak Date: Mon, 3 Jan 2022 19:16:28 -0800 Subject: [PATCH 2/7] refactor: Addressing PR review comments. --- google/cloud/aiplatform/explain/lit.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py index 3d9950f8af..a58059aa89 100644 --- a/google/cloud/aiplatform/explain/lit.py +++ b/google/cloud/aiplatform/explain/lit.py @@ -14,14 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Tuple, Union import os +from typing import Dict, List, Optional, Tuple, Union try: from lit_nlp.api import dataset as lit_dataset + from lit_nlp.api import dtypes as lit_dtypes from lit_nlp.api import model as lit_model from lit_nlp.api import types as lit_types - from lit_nlp.api import dtypes as lit_dtypes from lit_nlp import notebook except ImportError: raise ImportError( @@ -113,7 +113,7 @@ def __init__( @property def attribution_explainer( self, - ) -> Union["AttributionExplainer", None]: # noqa: F821 + ) -> Optional["AttributionExplainer"]: # noqa: F821 """Gets the attribution explainer property if set.""" return self._attribution_explainer @@ -164,6 +164,12 @@ def output_spec(self) -> lit_types.Spec: return output_spec_dict def _load_model(self, model: str): + """Loads a TensorFlow saved model and populates the input and output signature attributes of the class. + Args: + model: Required. A string reference to a TensorFlow saved model directory. + Raises: + ValueError if the model has more than one input tensor or more than one output tensor. + """ self._loaded_model = tf.saved_model.load(model) serving_default = self._loaded_model.signatures[ tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -180,10 +186,15 @@ def _load_model(self, model: str): def _set_up_attribution_explainer( self, model: str, attribution_method: str = "integrated_gradients" ): + """Populates the attribution explainer attribute of the class.""" try: import explainable_ai_sdk from explainable_ai_sdk.metadata.tf.v2 import SavedModelMetadataBuilder except ImportError: + print( + "Skipping explanations because the Explainable AI SDK is not installed." + 'Please install the SDK using "pip install explainable-ai-sdk"' + ) return builder = SavedModelMetadataBuilder(model) From 93c08bade51b7ce47383db96b5f5ff361c64af03 Mon Sep 17 00:00:00 2001 From: taiseiak Date: Wed, 5 Jan 2022 17:15:18 -0800 Subject: [PATCH 3/7] fix: Fix lint error --- google/cloud/aiplatform/explain/lit.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py index a58059aa89..dafe9b9b1e 100644 --- a/google/cloud/aiplatform/explain/lit.py +++ b/google/cloud/aiplatform/explain/lit.py @@ -111,9 +111,7 @@ def __init__( self._set_up_attribution_explainer(model, attribution_method) @property - def attribution_explainer( - self, - ) -> Optional["AttributionExplainer"]: # noqa: F821 + def attribution_explainer(self,) -> Optional["AttributionExplainer"]: # noqa: F821 """Gets the attribution explainer property if set.""" return self._attribution_explainer From 237e4040dfb29d59a6fc644f1bb6965618485f88 Mon Sep 17 00:00:00 2001 From: taiseiak Date: Wed, 12 Jan 2022 11:36:35 -0800 Subject: [PATCH 4/7] style: Address review comments --- google/cloud/aiplatform/explain/lit.py | 13 +++++++++++-- tests/unit/aiplatform/test_explain_lit.py | 10 +++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py index dafe9b9b1e..f9b99d5b87 100644 --- a/google/cloud/aiplatform/explain/lit.py +++ b/google/cloud/aiplatform/explain/lit.py @@ -14,7 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os + from typing import Dict, List, Optional, Tuple, Union try: @@ -184,12 +186,19 @@ def _load_model(self, model: str): def _set_up_attribution_explainer( self, model: str, attribution_method: str = "integrated_gradients" ): - """Populates the attribution explainer attribute of the class.""" + """Populates the attribution explainer attribute of the class. + Args: + model: Required. A string reference to a TensorFlow saved model directory. + attribution_method: + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. + """ try: import explainable_ai_sdk from explainable_ai_sdk.metadata.tf.v2 import SavedModelMetadataBuilder except ImportError: - print( + logging.info( "Skipping explanations because the Explainable AI SDK is not installed." 'Please install the SDK using "pip install explainable-ai-sdk"' ) diff --git a/tests/unit/aiplatform/test_explain_lit.py b/tests/unit/aiplatform/test_explain_lit.py index 9b7f17da69..8f10193c7b 100644 --- a/tests/unit/aiplatform/test_explain_lit.py +++ b/tests/unit/aiplatform/test_explain_lit.py @@ -15,21 +15,21 @@ # limitations under the License. import collections +import explainable_ai_sdk import os +import pandas as pd import pytest import tensorflow as tf -import pandas as pd -from lit_nlp.api import types as lit_types -from lit_nlp import notebook -from unittest import mock -import explainable_ai_sdk from google.cloud.aiplatform.explain.lit import ( create_lit_dataset, create_lit_model, open_lit, set_up_and_open_lit, ) +from lit_nlp.api import types as lit_types +from lit_nlp import notebook +from unittest import mock @pytest.fixture From 02e58fc1e8cd7796c4293ce92d58c8483f69737e Mon Sep 17 00:00:00 2001 From: taiseiak Date: Thu, 20 Jan 2022 12:22:58 -0800 Subject: [PATCH 5/7] feat: Open LIT with a deployed model Add the ability to open LIT with a deployed model. Has support for explainability, if the model has it enabled. --- google/cloud/aiplatform/explain/lit.py | 118 +++++- tests/unit/aiplatform/test_explain_lit.py | 455 ++++++++++++++++------ 2 files changed, 447 insertions(+), 126 deletions(-) diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py index f9b99d5b87..f10c1e3e7c 100644 --- a/google/cloud/aiplatform/explain/lit.py +++ b/google/cloud/aiplatform/explain/lit.py @@ -17,6 +17,7 @@ import logging import os +from google.cloud import aiplatform from typing import Dict, List, Optional, Tuple, Union try: @@ -75,8 +76,103 @@ def spec(self): return dict(self._column_types) -class _VertexLitModel(lit_model.Model): - """LIT model class for the Vertex LIT integration. +class _EndpointLitModel(lit_model.Model): + """LIT model class for the Vertex LIT integration with a model deployed to an endpoint. + + This is used in the create_lit_model function. + """ + + def __init__( + self, + model: str, + input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + model_id: Optional[str] = None, + ): + """Construct a VertexLitModel. + Args: + model: + Required. The name of the Endpoint resource. Format: + ``projects/{project}/locations/{location}/endpoints/{endpoint}`` + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + model_id: + Optional. A string of the specific model in the endpoint to create the + LIT model from. If this is not set, any usable model in the endpoint is + used to create the LIT model. + Raises: + ValueError if the model_id was not found in the endpoint. + """ + self._endpoint = aiplatform.Endpoint(model) + self._model_id = model_id + self._input_types = input_types + self._output_types = output_types + # Check if the model with the model ID has explanation enabled + if model_id: + deployed_model = next( + filter( + lambda model: model.id == model_id, self._endpoint.list_models() + ), + None, + ) + if not deployed_model: + raise ValueError( + "A model with id {model_id} was not found in the endpoint {endpoint}.".format( + model_id=model_id, endpoint=model + ) + ) + self._explanation_enabled = bool(deployed_model.explanation_spec) + # Check if all models in the endpoint have explanation enabled + else: + self._explanation_enabled = all( + map( + lambda model: bool(model.explanation_spec), + self._endpoint.list_models(), + ) + ) + + def predict_minibatch( + self, inputs: List[lit_types.JsonDict] + ) -> List[lit_types.JsonDict]: + instances = [] + for input in inputs: + instance = [input[feature] for feature in self._input_types] + instances.append(instance) + if self._explanation_enabled: + prediction_object = self._endpoint.explain(instances) + else: + prediction_object = self._endpoint.predict(instances) + outputs = [] + for prediction in prediction_object.predictions: + outputs.append({key: prediction[key] for key in self._output_types}) + if self._explanation_enabled: + for i, explanation in enumerate(prediction_object.explanations): + attributions = explanation.attributions + outputs[i]["feature_attribution"] = lit_dtypes.FeatureSalience( + attributions + ) + return outputs + + def input_spec(self) -> lit_types.Spec: + """Return a spec describing model inputs.""" + return dict(self._input_types) + + def output_spec(self) -> lit_types.Spec: + """Return a spec describing model outputs.""" + output_spec_dict = dict(self._output_types) + if self._explanation_enabled: + output_spec_dict["feature_attribution"] = lit_types.FeatureSalience( + signed=True + ) + return output_spec_dict + + +class _TensorFlowLitModel(lit_model.Model): + """LIT model class for the Vertex LIT integration with a TensorFlow saved model. This is used in the create_lit_model function. """ @@ -244,12 +340,17 @@ def create_lit_model( input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 attribution_method: str = "sampled_shapley", + model_id: Optional[str] = None, ) -> lit_model.Model: """Creates a LIT Model object. Args: model: - Required. A string reference to a local TensorFlow saved model directory. - The model must have at most one input and one output tensor. + Required. A string reference to a local TensorFlow saved model directory, + or the name of the Endpoint resource. Endpoint format: + ``projects/{project}/locations/{location}/endpoints/{endpoint}`` + + If using a local TensorFlow model, the model must have at most one + input and one output tensor. input_types: Required. An OrderedDict of string names matching the features of the model as the key, and the associated LitType of the feature. @@ -260,10 +361,17 @@ def create_lit_model( Optional. A string to choose what attribution configuration to set up the explainer with. Valid options are 'sampled_shapley' or 'integrated_gradients'. + model_id: + Optional. A string of the specific model in the endpoint to create the + LIT model from. If this is not set, any usable model in the endpoint is + used to create the LIT model. Returns: A LIT Model object that has the same functionality as the model provided. """ - return _VertexLitModel(model, input_types, output_types, attribution_method) + if os.path.exists(model): + return _TensorFlowLitModel(model, input_types, output_types, attribution_method) + else: + return _EndpointLitModel(model, input_types, output_types, model_id) def open_lit( diff --git a/tests/unit/aiplatform/test_explain_lit.py b/tests/unit/aiplatform/test_explain_lit.py index 8f10193c7b..4a041906b7 100644 --- a/tests/unit/aiplatform/test_explain_lit.py +++ b/tests/unit/aiplatform/test_explain_lit.py @@ -21,16 +21,103 @@ import pytest import tensorflow as tf +from google.auth import credentials as auth_credentials +from google.cloud import aiplatform +from google.cloud.aiplatform import initializer +from google.cloud.aiplatform.compat.types import ( + endpoint as gca_endpoint, + prediction_service as gca_prediction_service, + explanation as gca_explanation, +) from google.cloud.aiplatform.explain.lit import ( create_lit_dataset, create_lit_model, open_lit, set_up_and_open_lit, ) +from google.cloud.aiplatform_v1.services.endpoint_service import ( + client as endpoint_service_client, +) +from google.cloud.aiplatform_v1.services.prediction_service import ( + client as prediction_service_client, +) +from importlib import reload from lit_nlp.api import types as lit_types from lit_nlp import notebook from unittest import mock +_TEST_PROJECT = "test-project" +_TEST_LOCATION = "us-central1" +_TEST_ID = "1028944691210842416" +_TEST_ID_2 = "4366591682456584192" +_TEST_ID_3 = "5820582938582924817" +_TEST_DISPLAY_NAME = "test-display-name" +_TEST_DISPLAY_NAME_2 = "test-display-name-2" +_TEST_DISPLAY_NAME_3 = "test-display-name-3" +_TEST_ENDPOINT_NAME = ( + f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/endpoints/{_TEST_ID}" +) +_TEST_CREDENTIALS = mock.Mock(spec=auth_credentials.AnonymousCredentials()) +_TEST_EXPLANATION_METADATA = aiplatform.explain.ExplanationMetadata( + inputs={ + "features": { + "input_tensor_name": "dense_input", + "encoding": "BAG_OF_FEATURES", + "modality": "numeric", + "index_feature_mapping": ["abc", "def", "ghj"], + } + }, + outputs={"medv": {"output_tensor_name": "dense_2"}}, +) +_TEST_EXPLANATION_PARAMETERS = aiplatform.explain.ExplanationParameters( + {"sampled_shapley_attribution": {"path_count": 10}} +) +_TEST_DEPLOYED_MODELS = [ + gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), + gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2), + gca_endpoint.DeployedModel(id=_TEST_ID_3, display_name=_TEST_DISPLAY_NAME_3), +] +_TEST_DEPLOYED_MODELS_WITH_EXPLANATION = [ + gca_endpoint.DeployedModel( + id=_TEST_ID, + display_name=_TEST_DISPLAY_NAME, + explanation_spec=gca_explanation.ExplanationSpec( + metadata=_TEST_EXPLANATION_METADATA, + parameters=_TEST_EXPLANATION_PARAMETERS, + ), + ), + gca_endpoint.DeployedModel( + id=_TEST_ID_2, + display_name=_TEST_DISPLAY_NAME_2, + explanation_spec=gca_explanation.ExplanationSpec( + metadata=_TEST_EXPLANATION_METADATA, + parameters=_TEST_EXPLANATION_PARAMETERS, + ), + ), + gca_endpoint.DeployedModel( + id=_TEST_ID_3, + display_name=_TEST_DISPLAY_NAME_3, + explanation_spec=gca_explanation.ExplanationSpec( + metadata=_TEST_EXPLANATION_METADATA, + parameters=_TEST_EXPLANATION_PARAMETERS, + ), + ), +] +_TEST_TRAFFIC_SPLIT = {_TEST_ID: 0, _TEST_ID_2: 100, _TEST_ID_3: 0} +_TEST_PREDICTION = [{"label": 1.0}] +_TEST_EXPLANATIONS = [gca_prediction_service.explanation.Explanation(attributions=[])] +_TEST_ATTRIBUTIONS = [ + gca_prediction_service.explanation.Attribution( + baseline_output_value=1.0, + instance_output_value=2.0, + feature_attributions={"feature_1": 3.0, "feature_2": 2.0}, + output_index=[1, 2, 3], + output_display_name="abc", + approximation_error=6.0, + output_name="xyz", + ) +] + @pytest.fixture def widget_render_mock(): @@ -57,16 +144,25 @@ def load_model_from_local_path_mock(): "feature_1": 0.01, "feature_2": 0.1, } - model_mock.explain.return_value = [ - explanation_mock - # , explanation_mock - ] + model_mock.explain.return_value = [explanation_mock] explainer_mock.return_value = model_mock yield explainer_mock @pytest.fixture -def set_up_sequential(tmpdir): +def feature_types(): + yield collections.OrderedDict( + [("feature_1", lit_types.Scalar()), ("feature_2", lit_types.Scalar())] + ) + + +@pytest.fixture +def label_types(): + yield collections.OrderedDict([("label", lit_types.RegressionScore())]) + + +@pytest.fixture +def set_up_sequential(tmpdir, feature_types, label_types): # Set up a sequential model seq_model = tf.keras.models.Sequential() seq_model.add(tf.keras.layers.Dense(32, activation="relu", input_shape=(2,))) @@ -74,10 +170,6 @@ def set_up_sequential(tmpdir): seq_model.add(tf.keras.layers.Dense(1, activation="sigmoid")) saved_model_path = str(tmpdir.mkdir("tmp")) tf.saved_model.save(seq_model, saved_model_path) - feature_types = collections.OrderedDict( - [("feature_1", lit_types.Scalar()), ("feature_2", lit_types.Scalar())] - ) - label_types = collections.OrderedDict([("label", lit_types.RegressionScore())]) yield feature_types, label_types, saved_model_path @@ -96,130 +188,251 @@ def set_up_pandas_dataframe_and_columns(): yield dataframe, columns -def test_create_lit_dataset_from_pandas_returns_dataset( - set_up_pandas_dataframe_and_columns, -): - pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns - lit_dataset = create_lit_dataset(pd_dataset, lit_columns) - expected_examples = [ - {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, - ] - - assert lit_dataset.spec() == dict(lit_columns) - assert expected_examples == lit_dataset._examples - +@pytest.fixture +def get_endpoint_with_models_mock(): + with mock.patch.object( + endpoint_service_client.EndpointServiceClient, "get_endpoint" + ) as get_endpoint_mock: + get_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + deployed_models=_TEST_DEPLOYED_MODELS, + traffic_split=_TEST_TRAFFIC_SPLIT, + ) + yield get_endpoint_mock -def test_create_lit_model_from_tensorflow_returns_model(set_up_sequential): - feature_types, label_types, saved_model_path = set_up_sequential - lit_model = create_lit_model(saved_model_path, feature_types, label_types) - test_inputs = [ - {"feature_1": 1.0, "feature_2": 2.0}, - ] - outputs = lit_model.predict_minibatch(test_inputs) - assert lit_model.input_spec() == dict(feature_types) - assert lit_model.output_spec() == dict(label_types) - assert len(outputs) == 1 - for item in outputs: - assert item.keys() == {"label"} - assert len(item.values()) == 1 +@pytest.fixture +def get_endpoint_with_models_with_explanation_mock(): + with mock.patch.object( + endpoint_service_client.EndpointServiceClient, "get_endpoint" + ) as get_endpoint_mock: + get_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + deployed_models=_TEST_DEPLOYED_MODELS_WITH_EXPLANATION, + traffic_split=_TEST_TRAFFIC_SPLIT, + ) + yield get_endpoint_mock -@mock.patch.dict(os.environ, {"LIT_PROXY_URL": "auto"}) -@pytest.mark.usefixtures( - "sampled_shapley_explainer_mock", "load_model_from_local_path_mock" -) -def test_create_lit_model_from_tensorflow_with_xai_returns_model(set_up_sequential): - feature_types, label_types, saved_model_path = set_up_sequential - lit_model = create_lit_model(saved_model_path, feature_types, label_types) - test_inputs = [ - {"feature_1": 1.0, "feature_2": 2.0}, - ] - outputs = lit_model.predict_minibatch(test_inputs) - - assert lit_model.input_spec() == dict(feature_types) - assert lit_model.output_spec() == dict( - {**label_types, "feature_attribution": lit_types.FeatureSalience(signed=True)} - ) - assert len(outputs) == 1 - for item in outputs: - assert item.keys() == {"label", "feature_attribution"} - assert len(item.values()) == 2 - - -def test_open_lit( - set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock -): - pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns - lit_dataset = create_lit_dataset(pd_dataset, lit_columns) - feature_types, label_types, saved_model_path = set_up_sequential - lit_model = create_lit_model(saved_model_path, feature_types, label_types) - - open_lit({"model": lit_model}, {"dataset": lit_dataset}) - widget_render_mock.assert_called_once() - - -def test_set_up_and_open_lit( - set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock -): - pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns - feature_types, label_types, saved_model_path = set_up_sequential - lit_dataset, lit_model = set_up_and_open_lit( - pd_dataset, lit_columns, saved_model_path, feature_types, label_types - ) +@pytest.fixture +def predict_client_predict_mock(): + with mock.patch.object( + prediction_service_client.PredictionServiceClient, "predict" + ) as predict_mock: + predict_mock.return_value = gca_prediction_service.PredictResponse( + deployed_model_id=_TEST_ID + ) + predict_mock.return_value.predictions.extend(_TEST_PREDICTION) + yield predict_mock - expected_examples = [ - {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, - ] - test_inputs = [ - {"feature_1": 1.0, "feature_2": 2.0}, - ] - outputs = lit_model.predict_minibatch(test_inputs) - assert lit_dataset.spec() == dict(lit_columns) - assert expected_examples == lit_dataset._examples +@pytest.fixture +def predict_client_explain_mock(): + with mock.patch.object( + prediction_service_client.PredictionServiceClient, "explain" + ) as predict_mock: + predict_mock.return_value = gca_prediction_service.ExplainResponse( + deployed_model_id=_TEST_ID, + ) + predict_mock.return_value.predictions.extend(_TEST_PREDICTION) + predict_mock.return_value.explanations.extend(_TEST_EXPLANATIONS) + predict_mock.return_value.explanations[0].attributions.extend( + _TEST_ATTRIBUTIONS + ) + yield predict_mock + + +class TestExplainLit: + def setup_method(self): + reload(initializer) + reload(aiplatform) + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + credentials=_TEST_CREDENTIALS, + ) + + def teardown_method(self): + initializer.global_pool.shutdown(wait=True) + + def test_create_lit_dataset_from_pandas_returns_dataset( + self, set_up_pandas_dataframe_and_columns, + ): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + lit_dataset = create_lit_dataset(pd_dataset, lit_columns) + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + ] - assert lit_model.input_spec() == dict(feature_types) - assert lit_model.output_spec() == dict(label_types) - assert len(outputs) == 1 - for item in outputs: - assert item.keys() == {"label"} - assert len(item.values()) == 1 + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples - widget_render_mock.assert_called_once() + def test_create_lit_model_from_tensorflow_returns_model(self, set_up_sequential): + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 + + @mock.patch.dict(os.environ, {"LIT_PROXY_URL": "auto"}) + @pytest.mark.usefixtures( + "sampled_shapley_explainer_mock", "load_model_from_local_path_mock" + ) + def test_create_lit_model_from_tensorflow_with_xai_returns_model( + self, set_up_sequential + ): + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict( + { + **label_types, + "feature_attribution": lit_types.FeatureSalience(signed=True), + } + ) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label", "feature_attribution"} + assert len(item.values()) == 2 + + @pytest.mark.usefixtures( + "predict_client_predict_mock", "get_endpoint_with_models_mock" + ) + @pytest.mark.parametrize("model_id", [None, _TEST_ID]) + def test_create_lit_model_from_endpoint_returns_model( + self, feature_types, label_types, model_id + ): + lit_model = create_lit_model( + _TEST_ENDPOINT_NAME, feature_types, label_types, model_id + ) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 -@mock.patch.dict(os.environ, {"LIT_PROXY_URL": "auto"}) -@pytest.mark.usefixtures( - "sampled_shapley_explainer_mock", "load_model_from_local_path_mock" -) -def test_set_up_and_open_lit_with_xai( - set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock -): - pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns - feature_types, label_types, saved_model_path = set_up_sequential - lit_dataset, lit_model = set_up_and_open_lit( - pd_dataset, lit_columns, saved_model_path, feature_types, label_types + @pytest.mark.usefixtures( + "predict_client_explain_mock", "get_endpoint_with_models_with_explanation_mock" ) + @pytest.mark.parametrize("model_id", [None, _TEST_ID]) + def test_create_lit_model_from_endpoint_with_xai_returns_model( + self, feature_types, label_types, model_id + ): + lit_model = create_lit_model( + _TEST_ENDPOINT_NAME, feature_types, label_types, model_id + ) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict( + { + **label_types, + "feature_attribution": lit_types.FeatureSalience(signed=True), + } + ) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label", "feature_attribution"} + assert len(item.values()) == 2 + + def test_open_lit( + self, set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock + ): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + lit_dataset = create_lit_dataset(pd_dataset, lit_columns) + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + + open_lit({"model": lit_model}, {"dataset": lit_dataset}) + widget_render_mock.assert_called_once() + + def test_set_up_and_open_lit( + self, set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock + ): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + feature_types, label_types, saved_model_path = set_up_sequential + lit_dataset, lit_model = set_up_and_open_lit( + pd_dataset, lit_columns, saved_model_path, feature_types, label_types + ) + + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + ] + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) - expected_examples = [ - {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, - ] - test_inputs = [ - {"feature_1": 1.0, "feature_2": 2.0}, - ] - outputs = lit_model.predict_minibatch(test_inputs) + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples - assert lit_dataset.spec() == dict(lit_columns) - assert expected_examples == lit_dataset._examples + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 - assert lit_model.input_spec() == dict(feature_types) - assert lit_model.output_spec() == dict( - {**label_types, "feature_attribution": lit_types.FeatureSalience(signed=True)} - ) - assert len(outputs) == 1 - for item in outputs: - assert item.keys() == {"label", "feature_attribution"} - assert len(item.values()) == 2 + widget_render_mock.assert_called_once() - widget_render_mock.assert_called_once() + @mock.patch.dict(os.environ, {"LIT_PROXY_URL": "auto"}) + @pytest.mark.usefixtures( + "sampled_shapley_explainer_mock", "load_model_from_local_path_mock" + ) + def test_set_up_and_open_lit_with_xai( + self, set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock + ): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + feature_types, label_types, saved_model_path = set_up_sequential + lit_dataset, lit_model = set_up_and_open_lit( + pd_dataset, lit_columns, saved_model_path, feature_types, label_types + ) + + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + ] + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict( + { + **label_types, + "feature_attribution": lit_types.FeatureSalience(signed=True), + } + ) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label", "feature_attribution"} + assert len(item.values()) == 2 + + widget_render_mock.assert_called_once() From fd0fd7fa6976c3480d996a5fc862e3715bff3ffc Mon Sep 17 00:00:00 2001 From: taiseiak Date: Fri, 28 Jan 2022 16:22:35 -0800 Subject: [PATCH 6/7] fix: Address review changes --- google/cloud/aiplatform/explain/lit.py | 71 ++++++++++++++++------- tests/unit/aiplatform/test_explain_lit.py | 58 +++++++++++++++++- 2 files changed, 104 insertions(+), 25 deletions(-) diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py index 57e1a43cbe..dfc89f6e13 100644 --- a/google/cloud/aiplatform/explain/lit.py +++ b/google/cloud/aiplatform/explain/lit.py @@ -84,7 +84,7 @@ class _EndpointLitModel(lit_model.Model): def __init__( self, - model: str, + endpoint: Union[str, aiplatform.Endpoint], input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 model_id: Optional[str] = None, @@ -107,7 +107,10 @@ def __init__( Raises: ValueError if the model_id was not found in the endpoint. """ - self._endpoint = aiplatform.Endpoint(model) + if isinstance(endpoint, str): + self._endpoint = aiplatform.Endpoint(endpoint) + else: + self._endpoint = endpoint self._model_id = model_id self._input_types = input_types self._output_types = output_types @@ -122,22 +125,25 @@ def __init__( if not deployed_model: raise ValueError( "A model with id {model_id} was not found in the endpoint {endpoint}.".format( - model_id=model_id, endpoint=model + model_id=model_id, endpoint=endpoint ) ) self._explanation_enabled = bool(deployed_model.explanation_spec) # Check if all models in the endpoint have explanation enabled else: self._explanation_enabled = all( - map( - lambda model: bool(model.explanation_spec), - self._endpoint.list_models(), - ) + model.explanation_spec for model in self._endpoint.list_models() ) def predict_minibatch( self, inputs: List[lit_types.JsonDict] ) -> List[lit_types.JsonDict]: + """Retun predictions based on a batch of inputs. + Args: + inputs: Requred. a List of instances to predict on based on the input spec. + Returns: + A list of predictions based on the output spec. + """ instances = [] for input in inputs: instance = [input[feature] for feature in self._input_types] @@ -216,6 +222,12 @@ def attribution_explainer(self,) -> Optional["AttributionExplainer"]: # noqa: F def predict_minibatch( self, inputs: List[lit_types.JsonDict] ) -> List[lit_types.JsonDict]: + """Retun predictions based on a batch of inputs. + Args: + inputs: Requred. a List of instances to predict on based on the input spec. + Returns: + A list of predictions based on the output spec. + """ instances = [] for input in inputs: instance = [input[feature] for feature in self._input_types] @@ -335,22 +347,44 @@ def create_lit_dataset( return _VertexLitDataset(dataset, column_types) +def create_lit_model_from_endpoint( + endpoint: Union[str, aiplatform.Endpoint], + input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + model_id: Optional[str] = None, +) -> lit_model.Model: + """Creates a LIT Model object. + Args: + model: + Required. The name of the Endpoint resource or an Endpoint class. + Endpoint name format: ``projects/{project}/locations/{location}/endpoints/{endpoint}`` + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + model_id: + Optional. A string of the specific model in the endpoint to create the + LIT model from. If this is not set, any usable model in the endpoint is + used to create the LIT model. + Returns: + A LIT Model object that has the same functionality as the model provided. + """ + return _EndpointLitModel(endpoint, input_types, output_types, model_id) + + def create_lit_model( model: str, input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 attribution_method: str = "sampled_shapley", - model_id: Optional[str] = None, ) -> lit_model.Model: """Creates a LIT Model object. Args: model: - Required. A string reference to a local TensorFlow saved model directory, - or the name of the Endpoint resource. Endpoint format: - ``projects/{project}/locations/{location}/endpoints/{endpoint}`` - - If using a local TensorFlow model, the model must have at most one - input and one output tensor. + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. input_types: Required. An OrderedDict of string names matching the features of the model as the key, and the associated LitType of the feature. @@ -361,17 +395,10 @@ def create_lit_model( Optional. A string to choose what attribution configuration to set up the explainer with. Valid options are 'sampled_shapley' or 'integrated_gradients'. - model_id: - Optional. A string of the specific model in the endpoint to create the - LIT model from. If this is not set, any usable model in the endpoint is - used to create the LIT model. Returns: A LIT Model object that has the same functionality as the model provided. """ - if os.path.exists(model): - return _TensorFlowLitModel(model, input_types, output_types, attribution_method) - else: - return _EndpointLitModel(model, input_types, output_types, model_id) + return _TensorFlowLitModel(model, input_types, output_types, attribution_method) def open_lit( diff --git a/tests/unit/aiplatform/test_explain_lit.py b/tests/unit/aiplatform/test_explain_lit.py index 4a041906b7..c8092b1742 100644 --- a/tests/unit/aiplatform/test_explain_lit.py +++ b/tests/unit/aiplatform/test_explain_lit.py @@ -32,6 +32,7 @@ from google.cloud.aiplatform.explain.lit import ( create_lit_dataset, create_lit_model, + create_lit_model_from_endpoint, open_lit, set_up_and_open_lit, ) @@ -317,8 +318,9 @@ def test_create_lit_model_from_tensorflow_with_xai_returns_model( def test_create_lit_model_from_endpoint_returns_model( self, feature_types, label_types, model_id ): - lit_model = create_lit_model( - _TEST_ENDPOINT_NAME, feature_types, label_types, model_id + endpoint = aiplatform.Endpoint(_TEST_ENDPOINT_NAME) + lit_model = create_lit_model_from_endpoint( + endpoint, feature_types, label_types, model_id ) test_inputs = [ {"feature_1": 1.0, "feature_2": 2.0}, @@ -339,7 +341,57 @@ def test_create_lit_model_from_endpoint_returns_model( def test_create_lit_model_from_endpoint_with_xai_returns_model( self, feature_types, label_types, model_id ): - lit_model = create_lit_model( + endpoint = aiplatform.Endpoint(_TEST_ENDPOINT_NAME) + lit_model = create_lit_model_from_endpoint( + endpoint, feature_types, label_types, model_id + ) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict( + { + **label_types, + "feature_attribution": lit_types.FeatureSalience(signed=True), + } + ) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label", "feature_attribution"} + assert len(item.values()) == 2 + + @pytest.mark.usefixtures( + "predict_client_predict_mock", "get_endpoint_with_models_mock" + ) + @pytest.mark.parametrize("model_id", [None, _TEST_ID]) + def test_create_lit_model_from_endpoint_name_returns_model( + self, feature_types, label_types, model_id + ): + lit_model = create_lit_model_from_endpoint( + _TEST_ENDPOINT_NAME, feature_types, label_types, model_id + ) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 1 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 + + @pytest.mark.usefixtures( + "predict_client_explain_mock", "get_endpoint_with_models_with_explanation_mock" + ) + @pytest.mark.parametrize("model_id", [None, _TEST_ID]) + def test_create_lit_model_from_endpoint_name_with_xai_returns_model( + self, feature_types, label_types, model_id + ): + lit_model = create_lit_model_from_endpoint( _TEST_ENDPOINT_NAME, feature_types, label_types, model_id ) test_inputs = [ From c5fdb95065888b98ec11cfd7f4f8c01039886e99 Mon Sep 17 00:00:00 2001 From: taiseiak Date: Tue, 1 Feb 2022 09:25:01 -0800 Subject: [PATCH 7/7] style: Fix indentation. --- google/cloud/aiplatform/explain/lit.py | 190 ++++++++++++------------- 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py index dfc89f6e13..5032055801 100644 --- a/google/cloud/aiplatform/explain/lit.py +++ b/google/cloud/aiplatform/explain/lit.py @@ -62,11 +62,11 @@ def __init__( ): """Construct a VertexLitDataset. Args: - dataset: - Required. A Pandas DataFrame that includes feature column names and data. - column_types: - Required. An OrderedDict of string names matching the columns of the dataset - as the key, and the associated LitType of the column. + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. """ self._examples = dataset.to_dict(orient="records") self._column_types = column_types @@ -91,19 +91,19 @@ def __init__( ): """Construct a VertexLitModel. Args: - model: - Required. The name of the Endpoint resource. Format: - ``projects/{project}/locations/{location}/endpoints/{endpoint}`` - input_types: - Required. An OrderedDict of string names matching the features of the model - as the key, and the associated LitType of the feature. - output_types: - Required. An OrderedDict of string names matching the labels of the model - as the key, and the associated LitType of the label. - model_id: - Optional. A string of the specific model in the endpoint to create the - LIT model from. If this is not set, any usable model in the endpoint is - used to create the LIT model. + model: + Required. The name of the Endpoint resource. Format: + ``projects/{project}/locations/{location}/endpoints/{endpoint}`` + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + model_id: + Optional. A string of the specific model in the endpoint to create the + LIT model from. If this is not set, any usable model in the endpoint is + used to create the LIT model. Raises: ValueError if the model_id was not found in the endpoint. """ @@ -140,7 +140,7 @@ def predict_minibatch( ) -> List[lit_types.JsonDict]: """Retun predictions based on a batch of inputs. Args: - inputs: Requred. a List of instances to predict on based on the input spec. + inputs: Requred. a List of instances to predict on based on the input spec. Returns: A list of predictions based on the output spec. """ @@ -192,19 +192,19 @@ def __init__( ): """Construct a VertexLitModel. Args: - model: - Required. A string reference to a local TensorFlow saved model directory. - The model must have at most one input and one output tensor. - input_types: - Required. An OrderedDict of string names matching the features of the model - as the key, and the associated LitType of the feature. - output_types: - Required. An OrderedDict of string names matching the labels of the model - as the key, and the associated LitType of the label. - attribution_method: - Optional. A string to choose what attribution configuration to - set up the explainer with. Valid options are 'sampled_shapley' - or 'integrated_gradients'. + model: + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + attribution_method: + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. """ self._load_model(model) self._input_types = input_types @@ -224,7 +224,7 @@ def predict_minibatch( ) -> List[lit_types.JsonDict]: """Retun predictions based on a batch of inputs. Args: - inputs: Requred. a List of instances to predict on based on the input spec. + inputs: Requred. a List of instances to predict on based on the input spec. Returns: A list of predictions based on the output spec. """ @@ -274,7 +274,7 @@ def output_spec(self) -> lit_types.Spec: def _load_model(self, model: str): """Loads a TensorFlow saved model and populates the input and output signature attributes of the class. Args: - model: Required. A string reference to a TensorFlow saved model directory. + model: Required. A string reference to a TensorFlow saved model directory. Raises: ValueError if the model has more than one input tensor or more than one output tensor. """ @@ -296,11 +296,11 @@ def _set_up_attribution_explainer( ): """Populates the attribution explainer attribute of the class. Args: - model: Required. A string reference to a TensorFlow saved model directory. + model: Required. A string reference to a TensorFlow saved model directory. attribution_method: - Optional. A string to choose what attribution configuration to - set up the explainer with. Valid options are 'sampled_shapley' - or 'integrated_gradients'. + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. """ try: import explainable_ai_sdk @@ -336,11 +336,11 @@ def create_lit_dataset( ) -> lit_dataset.Dataset: """Creates a LIT Dataset object. Args: - dataset: - Required. A Pandas DataFrame that includes feature column names and data. - column_types: - Required. An OrderedDict of string names matching the columns of the dataset - as the key, and the associated LitType of the column. + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. Returns: A LIT Dataset object that has the data from the dataset provided. """ @@ -355,19 +355,19 @@ def create_lit_model_from_endpoint( ) -> lit_model.Model: """Creates a LIT Model object. Args: - model: - Required. The name of the Endpoint resource or an Endpoint class. - Endpoint name format: ``projects/{project}/locations/{location}/endpoints/{endpoint}`` - input_types: - Required. An OrderedDict of string names matching the features of the model - as the key, and the associated LitType of the feature. - output_types: - Required. An OrderedDict of string names matching the labels of the model - as the key, and the associated LitType of the label. - model_id: - Optional. A string of the specific model in the endpoint to create the - LIT model from. If this is not set, any usable model in the endpoint is - used to create the LIT model. + model: + Required. The name of the Endpoint resource or an Endpoint instance. + Endpoint name format: ``projects/{project}/locations/{location}/endpoints/{endpoint}`` + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + model_id: + Optional. A string of the specific model in the endpoint to create the + LIT model from. If this is not set, any usable model in the endpoint is + used to create the LIT model. Returns: A LIT Model object that has the same functionality as the model provided. """ @@ -382,19 +382,19 @@ def create_lit_model( ) -> lit_model.Model: """Creates a LIT Model object. Args: - model: - Required. A string reference to a local TensorFlow saved model directory. - The model must have at most one input and one output tensor. - input_types: - Required. An OrderedDict of string names matching the features of the model - as the key, and the associated LitType of the feature. - output_types: - Required. An OrderedDict of string names matching the labels of the model - as the key, and the associated LitType of the label. - attribution_method: - Optional. A string to choose what attribution configuration to - set up the explainer with. Valid options are 'sampled_shapley' - or 'integrated_gradients'. + model: + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + attribution_method: + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. Returns: A LIT Model object that has the same functionality as the model provided. """ @@ -408,12 +408,12 @@ def open_lit( ): """Open LIT from the provided models and datasets. Args: - models: - Required. A list of LIT models to open LIT with. - input_types: - Required. A lit of LIT datasets to open LIT with. - open_in_new_tab: - Optional. A boolean to choose if LIT open in a new tab or not. + models: + Required. A list of LIT models to open LIT with. + input_types: + Required. A lit of LIT datasets to open LIT with. + open_in_new_tab: + Optional. A boolean to choose if LIT open in a new tab or not. Raises: ImportError if LIT is not installed. """ @@ -432,26 +432,26 @@ def set_up_and_open_lit( ) -> Tuple[lit_dataset.Dataset, lit_model.Model]: """Creates a LIT dataset and model and opens LIT. Args: - dataset: - Required. A Pandas DataFrame that includes feature column names and data. - column_types: - Required. An OrderedDict of string names matching the columns of the dataset - as the key, and the associated LitType of the column. - model: - Required. A string reference to a TensorFlow saved model directory. - The model must have at most one input and one output tensor. - input_types: - Required. An OrderedDict of string names matching the features of the model - as the key, and the associated LitType of the feature. - output_types: - Required. An OrderedDict of string names matching the labels of the model - as the key, and the associated LitType of the label. - attribution_method: - Optional. A string to choose what attribution configuration to - set up the explainer with. Valid options are 'sampled_shapley' - or 'integrated_gradients'. - open_in_new_tab: - Optional. A boolean to choose if LIT open in a new tab or not. + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. + model: + Required. A string reference to a TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + attribution_method: + Optional. A string to choose what attribution configuration to + set up the explainer with. Valid options are 'sampled_shapley' + or 'integrated_gradients'. + open_in_new_tab: + Optional. A boolean to choose if LIT open in a new tab or not. Returns: A Tuple of the LIT dataset and model created. Raises: