From 03cf301989a5802b122803eac7a2d03f2d1769fb Mon Sep 17 00:00:00 2001 From: Taisei Klasen Date: Thu, 16 Dec 2021 10:12:14 -0800 Subject: [PATCH] Feat: Add LIT methods for Pandas DataFrame and TensorFlow saved model. (#874) Adds methods from go/lit-xai-notebook for Pandas DataFrame and TensorFlow saved model. b/208628825 Example Colab: go/lit-vertex-pr-1 --- google/cloud/aiplatform/explain/lit.py | 251 ++++++++++++++++++++++ setup.py | 15 +- tests/unit/aiplatform/test_explain_lit.py | 142 ++++++++++++ 3 files changed, 405 insertions(+), 3 deletions(-) create mode 100644 google/cloud/aiplatform/explain/lit.py create mode 100644 tests/unit/aiplatform/test_explain_lit.py diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py new file mode 100644 index 0000000000..432dd9194a --- /dev/null +++ b/google/cloud/aiplatform/explain/lit.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Tuple, Union + +try: + from lit_nlp.api import dataset as lit_dataset + from lit_nlp.api import model as lit_model + from lit_nlp.api import types as lit_types + from lit_nlp import notebook +except ImportError: + raise ImportError( + "LIT is not installed and is required to get Dataset as the return format. " + 'Please install the SDK using "pip install python-aiplatform[lit]"' + ) + +try: + import tensorflow as tf +except ImportError: + raise ImportError( + "Tensorflow is not installed and is required to load saved model. " + 'Please install the SDK using "pip install pip install python-aiplatform[lit]"' + ) + +try: + import pandas as pd +except ImportError: + raise ImportError( + "Pandas is not installed and is required to read the dataset. " + 'Please install Pandas using "pip install python-aiplatform[lit]"' + ) + + +class _VertexLitDataset(lit_dataset.Dataset): + """LIT dataset class for the Vertex LIT integration. + + This is used in the create_lit_dataset function. + """ + + def __init__( + self, + dataset: pd.DataFrame, + column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + ): + """Construct a VertexLitDataset. + Args: + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. + """ + self._examples = dataset.to_dict(orient="records") + self._column_types = column_types + + def spec(self): + """Return a spec describing dataset elements.""" + return dict(self._column_types) + + +class _VertexLitModel(lit_model.Model): + """LIT model class for the Vertex LIT integration. + + This is used in the create_lit_model function. + """ + + def __init__( + self, + model: str, + input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + ): + """Construct a VertexLitModel. + Args: + model: + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + """ + self._loaded_model = tf.saved_model.load(model) + serving_default = self._loaded_model.signatures[ + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ] + _, self._kwargs_signature = serving_default.structured_input_signature + self._output_signature = serving_default.structured_outputs + + if len(self._kwargs_signature) != 1: + raise ValueError("Please use a model with only one input tensor.") + + if len(self._output_signature) != 1: + raise ValueError("Please use a model with only one output tensor.") + + self._input_types = input_types + self._output_types = output_types + + def predict_minibatch( + self, inputs: List[lit_types.JsonDict] + ) -> List[lit_types.JsonDict]: + """Returns predictions for a single batch of examples. + Args: + inputs: + sequence of inputs, following model.input_spec() + Returns: + list of outputs, following model.output_spec() + """ + instances = [] + for input in inputs: + instance = [input[feature] for feature in self._input_types] + instances.append(instance) + prediction_input_dict = { + next(iter(self._kwargs_signature)): tf.convert_to_tensor(instances) + } + prediction_dict = self._loaded_model.signatures[ + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ](**prediction_input_dict) + predictions = prediction_dict[next(iter(self._output_signature))].numpy() + outputs = [] + for prediction in predictions: + outputs.append( + { + label: value + for label, value in zip(self._output_types.keys(), prediction) + } + ) + return outputs + + def input_spec(self) -> lit_types.Spec: + """Return a spec describing model inputs.""" + return dict(self._input_types) + + def output_spec(self) -> lit_types.Spec: + """Return a spec describing model outputs.""" + return self._output_types + + +def create_lit_dataset( + dataset: pd.DataFrame, + column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 +) -> lit_dataset.Dataset: + """Creates a LIT Dataset object. + Args: + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. + Returns: + A LIT Dataset object that has the data from the dataset provided. + """ + return _VertexLitDataset(dataset, column_types) + + +def create_lit_model( + model: str, + input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 +) -> lit_model.Model: + """Creates a LIT Model object. + Args: + model: + Required. A string reference to a local TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + Returns: + A LIT Model object that has the same functionality as the model provided. + """ + return _VertexLitModel(model, input_types, output_types) + + +def open_lit( + models: Dict[str, lit_model.Model], + datasets: Dict[str, lit_dataset.Dataset], + open_in_new_tab: bool = True, +): + """Open LIT from the provided models and datasets. + Args: + models: + Required. A list of LIT models to open LIT with. + input_types: + Required. A lit of LIT datasets to open LIT with. + open_in_new_tab: + Optional. A boolean to choose if LIT open in a new tab or not. + Raises: + ImportError if LIT is not installed. + """ + widget = notebook.LitWidget(models, datasets, open_in_new_tab=open_in_new_tab) + widget.render() + + +def set_up_and_open_lit( + dataset: Union[pd.DataFrame, lit_dataset.Dataset], + column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821 + model: Union[str, lit_model.Model], + input_types: Union[List[str], Dict[str, lit_types.LitType]], + output_types: Union[str, List[str], Dict[str, lit_types.LitType]], + open_in_new_tab: bool = True, +) -> Tuple[lit_dataset.Dataset, lit_model.Model]: + """Creates a LIT dataset and model and opens LIT. + Args: + dataset: + Required. A Pandas DataFrame that includes feature column names and data. + column_types: + Required. An OrderedDict of string names matching the columns of the dataset + as the key, and the associated LitType of the column. + model: + Required. A string reference to a TensorFlow saved model directory. + The model must have at most one input and one output tensor. + input_types: + Required. An OrderedDict of string names matching the features of the model + as the key, and the associated LitType of the feature. + output_types: + Required. An OrderedDict of string names matching the labels of the model + as the key, and the associated LitType of the label. + Returns: + A Tuple of the LIT dataset and model created. + Raises: + ImportError if LIT or TensorFlow is not installed. + ValueError if the model doesn't have only 1 input and output tensor. + """ + if not isinstance(dataset, lit_dataset.Dataset): + dataset = create_lit_dataset(dataset, column_types) + + if not isinstance(model, lit_model.Model): + model = create_lit_model(model, input_types, output_types) + + open_lit({"model": model}, {"dataset": dataset}, open_in_new_tab=open_in_new_tab) + + return dataset, model diff --git a/setup.py b/setup.py index 4ef6968114..c4ceaf9d40 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] metadata_extra_require = ["pandas >= 1.0.0"] xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] +lit_extra_require = ["tensorflow >= 2.3.0", "pandas >= 1.0.0", "lit-nlp >= 0.4.0"] profiler_extra_require = [ "tensorboard-plugin-profile >= 2.4.0", "werkzeug >= 2.0.0", @@ -43,10 +44,17 @@ ] full_extra_require = list( - set(tensorboard_extra_require + metadata_extra_require + xai_extra_require) + set( + tensorboard_extra_require + + metadata_extra_require + + xai_extra_require + + lit_extra_require + ) ) testing_extra_require = ( - full_extra_require + profiler_extra_require + ["grpcio-testing", "pytest-xdist"] + full_extra_require + + profiler_extra_require + + ["grpcio-testing", "pytest-xdist", "ipython"] ) @@ -88,7 +96,8 @@ "tensorboard": tensorboard_extra_require, "testing": testing_extra_require, "xai": xai_extra_require, - "cloud-profiler": profiler_extra_require, + "lit": lit_extra_require, + "cloud_profiler": profiler_extra_require, }, python_requires=">=3.6", scripts=[], diff --git a/tests/unit/aiplatform/test_explain_lit.py b/tests/unit/aiplatform/test_explain_lit.py new file mode 100644 index 0000000000..718f62b022 --- /dev/null +++ b/tests/unit/aiplatform/test_explain_lit.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import pytest +import tensorflow as tf +import pandas as pd + +from lit_nlp.api import types as lit_types +from lit_nlp import notebook +from unittest import mock +from google.cloud.aiplatform.explain.lit import ( + create_lit_dataset, + create_lit_model, + open_lit, + set_up_and_open_lit, +) + + +@pytest.fixture +def widget_render_mock(): + with mock.patch.object(notebook.LitWidget, "render") as render_mock: + yield render_mock + + +@pytest.fixture +def set_up_sequential(tmpdir): + # Set up a sequential model + seq_model = tf.keras.models.Sequential() + seq_model.add(tf.keras.layers.Dense(32, activation="relu", input_shape=(2,))) + seq_model.add(tf.keras.layers.Dense(32, activation="relu")) + seq_model.add(tf.keras.layers.Dense(1, activation="sigmoid")) + saved_model_path = str(tmpdir.mkdir("tmp")) + tf.saved_model.save(seq_model, saved_model_path) + feature_types = collections.OrderedDict( + [("feature_1", lit_types.Scalar()), ("feature_2", lit_types.Scalar())] + ) + label_types = collections.OrderedDict([("label", lit_types.RegressionScore())]) + yield feature_types, label_types, saved_model_path + + +@pytest.fixture +def set_up_pandas_dataframe_and_columns(): + dataframe = pd.DataFrame.from_dict( + {"feature_1": [1.0, 2.0], "feature_2": [3.0, 4.0], "label": [1.0, 0.0]} + ) + columns = collections.OrderedDict( + [ + ("feature_1", lit_types.Scalar()), + ("feature_2", lit_types.Scalar()), + ("label", lit_types.RegressionScore()), + ] + ) + yield dataframe, columns + + +def test_create_lit_dataset_from_pandas_returns_dataset( + set_up_pandas_dataframe_and_columns, +): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + lit_dataset = create_lit_dataset(pd_dataset, lit_columns) + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + {"feature_1": 2.0, "feature_2": 4.0, "label": 0.0}, + ] + + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples + + +def test_create_lit_model_from_tensorflow_returns_model(set_up_sequential): + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + {"feature_1": 3.0, "feature_2": 4.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 2 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 + + +def test_open_lit( + set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock +): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + lit_dataset = create_lit_dataset(pd_dataset, lit_columns) + feature_types, label_types, saved_model_path = set_up_sequential + lit_model = create_lit_model(saved_model_path, feature_types, label_types) + + open_lit({"model": lit_model}, {"dataset": lit_dataset}) + widget_render_mock.assert_called_once() + + +def test_set_up_and_open_lit( + set_up_sequential, set_up_pandas_dataframe_and_columns, widget_render_mock +): + pd_dataset, lit_columns = set_up_pandas_dataframe_and_columns + feature_types, label_types, saved_model_path = set_up_sequential + lit_dataset, lit_model = set_up_and_open_lit( + pd_dataset, lit_columns, saved_model_path, feature_types, label_types + ) + + expected_examples = [ + {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0}, + {"feature_1": 2.0, "feature_2": 4.0, "label": 0.0}, + ] + test_inputs = [ + {"feature_1": 1.0, "feature_2": 2.0}, + {"feature_1": 3.0, "feature_2": 4.0}, + ] + outputs = lit_model.predict_minibatch(test_inputs) + + assert lit_dataset.spec() == dict(lit_columns) + assert expected_examples == lit_dataset._examples + + assert lit_model.input_spec() == dict(feature_types) + assert lit_model.output_spec() == dict(label_types) + assert len(outputs) == 2 + for item in outputs: + assert item.keys() == {"label"} + assert len(item.values()) == 1 + + widget_render_mock.assert_called_once()