Add AlbertClassifier (#668)

kanpuriyanawab · mattdangerw · web-flow · commit 85514c5da4e1 · 2023-01-31T13:41:26.000-08:00
* init commit

* added classifier, from_preset_method unimplemented yet

* running formatting, removing unused imports

* incorporating suggested changes

* formatting

* updating docstrings

* fixing errors due to merge

* fixing formattinf

* Fix test names

---------

Co-authored-by: Matt Watson &lt;mattdangerw@gmail.com&gt;
diff --git a/keras_nlp/models/albert/albert_classifier.py b/keras_nlp/models/albert/albert_classifier.py
@@ -0,0 +1,197 @@
+# Copyright 2022 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""ALBERT classification model."""
+
+import copy
+
+from tensorflow import keras
+
+from keras_nlp.models.albert.albert_backbone import AlbertBackbone
+from keras_nlp.models.albert.albert_backbone import albert_kernel_initializer
+from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
+from keras_nlp.models.albert.albert_presets import backbone_presets
+from keras_nlp.models.task import Task
+from keras_nlp.utils.python_utils import classproperty
+
+
+@keras.utils.register_keras_serializable(package="keras_nlp")
+class AlbertClassifier(Task):
+    """An end-to-end ALBERT model for classification tasks
+
+    This model attaches a classification head to a `keras_nlp.model.AlbertBackbone`
+    backbone, mapping from the backbone outputs to logit output suitable for
+    a classification task. For usage of this model with pre-trained weights, see
+    the `from_preset()` method.
+
+    This model can optionally be configured with a `preprocessor` layer, in
+    which case it will automatically apply preprocessing to raw inputs during
+    `fit()`, `predict()`, and `evaluate()`. This is done by default when
+    creating the model with `from_preset()`.
+
+    Disclaimer: Pre-trained models are provided on an "as is" basis, without
+    warranties or conditions of any kind.
+
+    Args:
+        backbone: A `keras_nlp.models.AlertBackbone` instance.
+        num_classes: int. Number of classes to predict.
+        dropout: float. The dropout probability value, applied after the dense
+            layer.
+        preprocessor: A `keras_nlp.models.AlbertPreprocessor` or `None`. If
+            `None`, this model will not apply preprocessing, and inputs should
+            be preprocessed before calling the model.
+
+    Examples:
+
+    Example usage.
+    ```python
+    # Define the preprocessed inputs.
+    preprocessed_features = {
+        "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
+        "segment_ids": tf.constant(
+            [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
+        ),
+        "padding_mask": tf.constant(
+            [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
+        ),
+    }
+    labels = [0, 3]
+
+    # Randomly initialize a ALBERT backbone.
+    backbone = AlbertBackbone(
+        vocabulary_size=1000,
+        num_layers=2,
+        num_heads=2,
+        embedding_dim=8,
+        hidden_dim=64,
+        intermediate_dim=128,
+        max_sequence_length=128,
+        name="encoder",
+    )
+
+    # Create a ALBERT classifier and fit your data.
+    classifier = keras_nlp.models.AlbertClassifier(
+        backbone,
+        num_classes=4,
+        preprocessor=None,
+    )
+    classifier.compile(
+        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    )
+    classifier.fit(x=preprocessed_features, y=labels, batch_size=2)
+
+    # Access backbone programatically (e.g., to change `trainable`)
+    classifier.backbone.trainable = False
+
+    Raw string inputs with customized preprocessing.
+    ```python
+    # Create a dataset with raw string features in an `(x, y)` format.
+    features = ["The quick brown fox jumped.", "I forgot my homework."]
+    labels = [0, 3]
+
+    # Use a shorter sequence length.
+    preprocessor = keras_nlp.models.AlbertPreprocessor.from_preset(
+        "albert_base_en_uncased",
+        sequence_length=128,
+    )
+
+    # Create a AlbertClassifier and fit your data.
+    classifier = keras_nlp.models.AlbertClassifier.from_preset(
+        "albert_base_en_uncased",
+        num_classes=4,
+        preprocessor=preprocessor,
+    )
+    classifier.compile(
+        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    )
+    classifier.fit(x=features, y=labels, batch_size=2)
+    ```
+
+    Preprocessed inputs.
+    ```python
+    # Create a dataset with preprocessed features in an `(x, y)` format.
+    preprocessed_features = {
+        "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
+        "segment_ids": tf.constant(
+            [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
+        ),
+        "padding_mask": tf.constant(
+            [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
+        ),
+    }
+    labels = [0, 3]
+
+    # Create a ALBERT classifier and fit your data.
+    classifier = keras_nlp.models.AlbertClassifier.from_preset(
+        "albert_base_en_uncased",
+        num_classes=4,
+        preprocessor=None,
+    )
+    classifier.compile(
+        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    )
+    classifier.fit(x=preprocessed_features, y=labels, batch_size=2)
+    ```
+    """
+
+    def __init__(
+        self,
+        backbone,
+        num_classes=2,
+        dropout=0.1,
+        preprocessor=None,
+        **kwargs,
+    ):
+        inputs = backbone.input
+        pooled = backbone(inputs)["pooled_output"]
+        pooled = keras.layers.Dropout(dropout)(pooled)
+        outputs = keras.layers.Dense(
+            num_classes,
+            kernel_initializer=albert_kernel_initializer(),
+            name="logits",
+        )(pooled)
+        # Instantiate using Functional API Model constructor
+        super().__init__(
+            inputs=inputs,
+            outputs=outputs,
+            include_preprocessing=preprocessor is not None,
+            **kwargs,
+        )
+        # All references to `self` below this line
+        self._backbone = backbone
+        self._preprocessor = preprocessor
+        self.num_classes = num_classes
+        self.dropout = dropout
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "num_classes": self.num_classes,
+                "dropout": self.dropout,
+            }
+        )
+
+        return config
+
+    @classproperty
+    def backbone_cls(cls):
+        return AlbertBackbone
+
+    @classproperty
+    def preprocessor_cls(cls):
+        return AlbertPreprocessor
+
+    @classproperty
+    def presets(cls):
+        return copy.deepcopy({**backbone_presets})
diff --git a/keras_nlp/models/albert/albert_classifier_test.py b/keras_nlp/models/albert/albert_classifier_test.py
@@ -0,0 +1,146 @@
+# Copyright 2022 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for BERT classification model."""
+
+import io
+import os
+
+import sentencepiece
+import tensorflow as tf
+from absl.testing import parameterized
+from tensorflow import keras
+
+from keras_nlp.models.albert.albert_backbone import AlbertBackbone
+from keras_nlp.models.albert.albert_classifier import AlbertClassifier
+from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
+from keras_nlp.models.albert.albert_tokenizer import AlbertTokenizer
+
+
+class AlbertClassifierTest(tf.test.TestCase, parameterized.TestCase):
+    def setUp(self):
+        self.backbone = AlbertBackbone(
+            vocabulary_size=1000,
+            num_layers=2,
+            num_heads=2,
+            embedding_dim=8,
+            hidden_dim=64,
+            intermediate_dim=128,
+            max_sequence_length=128,
+            name="encoder",
+        )
+
+        bytes_io = io.BytesIO()
+        vocab_data = tf.data.Dataset.from_tensor_slices(
+            ["the quick brown fox", "the earth is round"]
+        )
+        sentencepiece.SentencePieceTrainer.train(
+            sentence_iterator=vocab_data.as_numpy_iterator(),
+            model_writer=bytes_io,
+            vocab_size=10,
+            model_type="WORD",
+            pad_id=0,
+            unk_id=1,
+            bos_id=2,
+            eos_id=3,
+            pad_piece="<pad>",
+            unk_piece="<unk>",
+            bos_piece="[CLS]",
+            eos_piece="[SEP]",
+        )
+        self.proto = bytes_io.getvalue()
+
+        tokenizer = AlbertTokenizer(proto=self.proto)
+
+        self.preprocessor = AlbertPreprocessor(
+            tokenizer=tokenizer,
+            sequence_length=8,
+        )
+        self.classifier = AlbertClassifier(
+            self.backbone,
+            4,
+            preprocessor=self.preprocessor,
+        )
+        self.classifier_no_preprocessing = AlbertClassifier(
+            self.backbone,
+            4,
+            preprocessor=None,
+        )
+
+        self.raw_batch = tf.constant(
+            [
+                "the quick brown fox.",
+                "the slow brown fox.",
+                "the smelly brown fox.",
+                "the old brown fox.",
+            ]
+        )
+        self.preprocessed_batch = self.preprocessor(self.raw_batch)
+        self.raw_dataset = tf.data.Dataset.from_tensor_slices(
+            (self.raw_batch, tf.ones((4,)))
+        ).batch(2)
+        self.preprocessed_dataset = self.raw_dataset.map(self.preprocessor)
+
+    def test_valid_call_classifier(self):
+        self.classifier(self.preprocessed_batch)
+
+    @parameterized.named_parameters(
+        ("jit_compile_false", False), ("jit_compile_true", True)
+    )
+    def test_albert_classifier_predict(self, jit_compile):
+        self.classifier.compile(jit_compile=jit_compile)
+        self.classifier.predict(self.raw_batch)
+
+    @parameterized.named_parameters(
+        ("jit_compile_false", False), ("jit_compile_true", True)
+    )
+    def test_albert_classifier_predict_no_preprocessing(self, jit_compile):
+        self.classifier_no_preprocessing.compile(jit_compile=jit_compile)
+        self.classifier_no_preprocessing.predict(self.preprocessed_batch)
+
+    @parameterized.named_parameters(
+        ("jit_compile_false", False), ("jit_compile_true", True)
+    )
+    def test_albert_classifier_fit(self, jit_compile):
+        self.classifier.compile(
+            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+            jit_compile=jit_compile,
+        )
+        self.classifier.fit(self.raw_dataset)
+
+    @parameterized.named_parameters(
+        ("jit_compile_false", False), ("jit_compile_true", True)
+    )
+    def test_albert_classifier_fit_no_preprocessing(self, jit_compile):
+        self.classifier_no_preprocessing.compile(
+            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+            jit_compile=jit_compile,
+        )
+        self.classifier_no_preprocessing.fit(self.preprocessed_dataset)
+
+    @parameterized.named_parameters(
+        ("tf_format", "tf", "model"),
+        ("keras_format", "keras_v3", "model.keras"),
+    )
+    def test_saved_model(self, save_format, filename):
+        model_output = self.classifier.predict(self.raw_batch)
+        save_path = os.path.join(self.get_temp_dir(), filename)
+        self.classifier.save(save_path, save_format=save_format)
+        restored_model = keras.models.load_model(save_path)
+
+        # Check we got the real object back.
+        self.assertIsInstance(restored_model, AlbertClassifier)
+
+        # Check that output matches.
+        restored_output = restored_model.predict(self.raw_batch)
+        self.assertAllClose(model_output, restored_output)
diff --git a/keras_nlp/models/albert/albert_presets_test.py b/keras_nlp/models/albert/albert_presets_test.py