feat: add explain get_metadata function for tf2. (#507)

* feat: add cancel method to pipeline client * feat: add basic metadata structure for XAI explain * feat: add tf2 get_metadata function * feat: add tf2 get_metadata function * Add more tests for tf2_getmetadata * Address comments * Update to tensorflow instead of tensorflow-cpu * Move one time use setup function
googleapis · Jul 8, 2021 · f6f9a97 · f6f9a97
1 parent 95639ee
commit f6f9a97
Show file tree

Hide file tree

Showing 5 changed files with 325 additions and 6 deletions.
diff --git a/google/cloud/aiplatform/explain/metadata/metadata_builder.py b/google/cloud/aiplatform/explain/metadata/metadata_builder.py
@@ -25,10 +25,6 @@
 class MetadataBuilder(_ABC):
     """Abstract base class for metadata builders."""
 
-    @abc.abstractmethod
-    def save_model_with_metadata(self, filepath: str):
-        """Saves the model with metadata."""
-
     @abc.abstractmethod
     def get_metadata(self):
         """Returns the current metadata as a dictionary."""
diff --git a/google/cloud/aiplatform/explain/metadata/tf/v2/__init__.py b/google/cloud/aiplatform/explain/metadata/tf/v2/__init__.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/google/cloud/aiplatform/explain/metadata/tf/v2/saved_model_metadata_builder.py b/google/cloud/aiplatform/explain/metadata/tf/v2/saved_model_metadata_builder.py
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.protobuf import json_format
+from typing import Optional, List, Dict, Any, Tuple
+
+from google.cloud.aiplatform.explain.metadata import metadata_builder
+from google.cloud.aiplatform.compat.types import (
+    explanation_metadata_v1beta1 as explanation_metadata,
+)
+
+
+class SavedModelMetadataBuilder(metadata_builder.MetadataBuilder):
+    """Class for generating metadata for a model built with TF 2.X Keras API."""
+
+    def __init__(
+        self,
+        model_path: str,
+        signature_name: Optional[str] = None,
+        outputs_to_explain: Optional[List[str]] = None,
+        **kwargs
+    ) -> None:
+        """Initializes a SavedModelMetadataBuilder object.
+
+        Args:
+          model_path:
+              Required. Path to load the saved model from.
+          signature_name:
+              Optional. Name of the signature to be explained. Inputs and
+              outputs of this signature will be written in the metadata. If not
+              provided, the default signature will be used.
+          outputs_to_explain:
+              Optional. List of output names to explain. Only single output is
+              supported for now. Hence, the list should contain one element.
+              This parameter is required if the model signature (provided via
+              signature_name) specifies multiple outputs.
+          **kwargs:
+              Any keyword arguments to be passed to tf.saved_model.save() function.
+
+        Raises:
+            ValueError if outputs_to_explain contains more than 1 element.
+            ImportError if tf is not imported.
+        """
+        if outputs_to_explain and len(outputs_to_explain) > 1:
+            raise ValueError(
+                '"outputs_to_explain" can only contain 1 element.\n'
+                "Got: %s" % len(outputs_to_explain)
+            )
+        self._explain_output = outputs_to_explain
+        self._saved_model_args = kwargs
+
+        try:
+            import tensorflow as tf
+        except ImportError:
+            raise ImportError(
+                "Tensorflow is not installed and is required to load saved model. "
+                'Please install the SDK using "pip install google-cloud-aiplatform[full]"'
+            )
+
+        if not signature_name:
+            signature_name = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+        self._loaded_model = tf.saved_model.load(model_path)
+        self._inputs, self._outputs = self._infer_metadata_entries_from_model(
+            signature_name
+        )
+
+    def _infer_metadata_entries_from_model(
+        self, signature_name: str
+    ) -> Tuple[
+        Dict[str, explanation_metadata.ExplanationMetadata.InputMetadata],
+        Dict[str, explanation_metadata.ExplanationMetadata.OutputMetadata],
+    ]:
+        """Infers metadata inputs and outputs.
+
+        Args:
+          signature_name:
+              Required. Name of the signature to be explained. Inputs and outputs of this signature will be written in the metadata. If not provided, the default signature will be used.
+
+        Returns:
+              Inferred input metadata and output metadata from the model.
+
+        Raises:
+              ValueError if specified name is not found in signature outputs.
+        """
+
+        loaded_sig = self._loaded_model.signatures[signature_name]
+        _, input_sig = loaded_sig.structured_input_signature
+        output_sig = loaded_sig.structured_outputs
+        input_mds = {}
+        for name, tensor_spec in input_sig.items():
+            input_mds[name] = explanation_metadata.ExplanationMetadata.InputMetadata(
+                input_tensor_name=name,
+                modality=None if tensor_spec.dtype.is_floating else "categorical",
+            )
+
+        output_mds = {}
+        for name in output_sig:
+            if not self._explain_output or self._explain_output[0] == name:
+                output_mds[
+                    name
+                ] = explanation_metadata.ExplanationMetadata.OutputMetadata(
+                    output_tensor_name=name,
+                )
+                break
+        else:
+            raise ValueError(
+                "Specified output name cannot be found in given signature outputs."
+            )
+        return input_mds, output_mds
+
+    def get_metadata(self) -> Dict[str, Any]:
+        """Returns the current metadata as a dictionary.
+
+        Returns:
+            Json format of the explanation metadata.
+        """
+        current_md = explanation_metadata.ExplanationMetadata(
+            inputs=self._inputs, outputs=self._outputs,
+        )
+        return json_format.MessageToDict(current_md._pb)
diff --git a/setup.py b/setup.py
@@ -29,9 +29,16 @@
 with io.open(readme_filename, encoding="utf-8") as readme_file:
     readme = readme_file.read()
 
-tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"]
+tensorboard_extra_require = [
+    "tensorflow >=2.3.0, <=2.5.0",
+    "grpcio~=1.34.0",
+    "six~=1.15.0",
+]
 metadata_extra_require = ["pandas >= 1.0.0"]
-full_extra_require = tensorboard_extra_require + metadata_extra_require
+xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"]
+full_extra_require = list(
+    set(tensorboard_extra_require + metadata_extra_require + xai_extra_require)
+)
 testing_extra_require = full_extra_require + ["grpcio-testing"]
 
 
@@ -69,6 +76,7 @@
         "metadata": metadata_extra_require,
         "tensorboard": tensorboard_extra_require,
         "testing": testing_extra_require,
+        "xai": xai_extra_require,
     },
     python_requires=">=3.6",
     scripts=[],

diff --git a/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_test.py b/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_test.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import tensorflow as tf
+import numpy as np
+
+from google.cloud.aiplatform.explain.metadata.tf.v2 import saved_model_metadata_builder
+
+
+class SavedModelMetadataBuilderTest(tf.test.TestCase):
+    def test_get_metadata_sequential(self):
+        # Set up for the sequential.
+        self.seq_model = tf.keras.models.Sequential()
+        self.seq_model.add(tf.keras.layers.Dense(32, activation="relu", input_dim=10))
+        self.seq_model.add(tf.keras.layers.Dense(32, activation="relu"))
+        self.seq_model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
+        self.saved_model_path = self.get_temp_dir()
+        tf.saved_model.save(self.seq_model, self.saved_model_path)
+
+        builder = saved_model_metadata_builder.SavedModelMetadataBuilder(
+            self.saved_model_path
+        )
+        generated_md = builder.get_metadata()
+        expected_md = {
+            "outputs": {"dense_2": {"outputTensorName": "dense_2"}},
+            "inputs": {"dense_input": {"inputTensorName": "dense_input"}},
+        }
+        assert expected_md == generated_md
+
+    def test_get_metadata_functional(self):
+        inputs1 = tf.keras.Input(shape=(10,), name="model_input1")
+        inputs2 = tf.keras.Input(shape=(10,), name="model_input2")
+        x = tf.keras.layers.Dense(32, activation="relu")(inputs1)
+        x = tf.keras.layers.Dense(32, activation="relu")(x)
+        x = tf.keras.layers.concatenate([x, inputs2])
+        outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)
+        fun_model = tf.keras.Model(
+            inputs=[inputs1, inputs2], outputs=outputs, name="fun"
+        )
+        model_dir = self.get_temp_dir()
+        tf.saved_model.save(fun_model, model_dir)
+        builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir)
+        generated_md = builder.get_metadata()
+        expected_md = {
+            "inputs": {
+                "model_input1": {"inputTensorName": "model_input1"},
+                "model_input2": {"inputTensorName": "model_input2"},
+            },
+            "outputs": {"dense_2": {"outputTensorName": "dense_2"}},
+        }
+        assert expected_md == generated_md
+
+    def test_get_metadata_subclassed_model(self):
+        class MyModel(tf.keras.Model):
+            def __init__(self, num_classes=2):
+                super(MyModel, self).__init__(name="my_model")
+                self.num_classes = num_classes
+                self.dense_1 = tf.keras.layers.Dense(32, activation="relu")
+                self.dense_2 = tf.keras.layers.Dense(num_classes, activation="sigmoid")
+
+            def call(self, inputs):
+                x = self.dense_1(inputs)
+                return self.dense_2(x)
+
+        subclassed_model = MyModel()
+        subclassed_model.compile(loss="categorical_crossentropy")
+        np.random.seed(0)
+        x_train = np.random.random((1, 100))
+        y_train = np.random.randint(2, size=(1, 2))
+        subclassed_model.fit(x_train, y_train, batch_size=1, epochs=1)
+        model_dir = self.get_temp_dir()
+        tf.saved_model.save(subclassed_model, model_dir)
+
+        builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir)
+        generated_md = builder.get_metadata()
+        expected_md = {
+            "inputs": {"input_1": {"inputTensorName": "input_1"}},
+            "outputs": {"output_1": {"outputTensorName": "output_1"}},
+        }
+        assert expected_md == generated_md
+
+    def test_non_keras_model(self):
+        class CustomModuleWithOutputName(tf.Module):
+            def __init__(self):
+                super(CustomModuleWithOutputName, self).__init__()
+                self.v = tf.Variable(1.0)
+
+            @tf.function(input_signature=[tf.TensorSpec([], tf.float32)])
+            def __call__(self, x):
+                return {"custom_output_name": x * self.v}
+
+        module_output = CustomModuleWithOutputName()
+        call_output = module_output.__call__.get_concrete_function(
+            tf.TensorSpec(None, tf.float32)
+        )
+        model_dir = self.get_temp_dir()
+        tf.saved_model.save(
+            module_output, model_dir, signatures={"serving_default": call_output}
+        )
+
+        builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir)
+        generated_md = builder.get_metadata()
+        expected_md = {
+            "inputs": {"x": {"inputTensorName": "x"}},
+            "outputs": {
+                "custom_output_name": {"outputTensorName": "custom_output_name"}
+            },
+        }
+        assert expected_md == generated_md
+
+    def test_model_with_feature_column(self):
+        feature_columns = [
+            tf.feature_column.embedding_column(
+                tf.feature_column.categorical_column_with_vocabulary_list(
+                    "mode", ["fixed", "normal", "reversible"]
+                ),
+                dimension=8,
+            ),
+            tf.feature_column.numeric_column("age"),
+        ]
+        feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
+
+        model = tf.keras.Sequential(
+            [
+                feature_layer,
+                tf.keras.layers.Dense(128, activation="relu"),
+                tf.keras.layers.Dense(1),
+            ]
+        )
+
+        model.compile(
+            optimizer="adam",
+            loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+            metrics=["accuracy"],
+        )
+
+        model.fit(
+            {"age": np.array([20, 1]), "mode": np.array(["fixed", "normal"])},
+            np.array([0, 1]),
+        )
+        model_dir = self.get_temp_dir()
+        tf.saved_model.save(model, model_dir)
+        builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir)
+        generated_md = builder.get_metadata()
+        expected_md = {
+            "inputs": {
+                "age": {"inputTensorName": "age", "modality": "categorical"},
+                "mode": {"inputTensorName": "mode", "modality": "categorical"},
+            },
+            "outputs": {"output_1": {"outputTensorName": "output_1"}},
+        }
+        assert expected_md == generated_md