Skip to content
197 changes: 197 additions & 0 deletions keras_nlp/models/albert/albert_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# Copyright 2022 The KerasNLP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ALBERT classification model."""

import copy

from tensorflow import keras

from keras_nlp.models.albert.albert_backbone import AlbertBackbone
from keras_nlp.models.albert.albert_backbone import albert_kernel_initializer
from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
from keras_nlp.models.albert.albert_presets import backbone_presets
from keras_nlp.models.task import Task
from keras_nlp.utils.python_utils import classproperty


@keras.utils.register_keras_serializable(package="keras_nlp")
class AlbertClassifier(Task):
"""An end-to-end ALBERT model for classification tasks

This model attaches a classification head to a `keras_nlp.model.AlbertBackbone`
backbone, mapping from the backbone outputs to logit output suitable for
a classification task. For usage of this model with pre-trained weights, see
the `from_preset()` method.

This model can optionally be configured with a `preprocessor` layer, in
which case it will automatically apply preprocessing to raw inputs during
`fit()`, `predict()`, and `evaluate()`. This is done by default when
creating the model with `from_preset()`.

Disclaimer: Pre-trained models are provided on an "as is" basis, without
warranties or conditions of any kind.

Args:
backbone: A `keras_nlp.models.AlertBackbone` instance.
num_classes: int. Number of classes to predict.
dropout: float. The dropout probability value, applied after the dense
layer.
preprocessor: A `keras_nlp.models.AlbertPreprocessor` or `None`. If
`None`, this model will not apply preprocessing, and inputs should
be preprocessed before calling the model.

Examples:

Example usage.
```python
# Define the preprocessed inputs.
preprocessed_features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"segment_ids": tf.constant(
[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
"padding_mask": tf.constant(
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
}
labels = [0, 3]

# Randomly initialize a ALBERT backbone.
backbone = AlbertBackbone(
vocabulary_size=1000,
num_layers=2,
num_heads=2,
embedding_dim=8,
hidden_dim=64,
intermediate_dim=128,
max_sequence_length=128,
name="encoder",
)

# Create a ALBERT classifier and fit your data.
classifier = keras_nlp.models.AlbertClassifier(
backbone,
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)

# Access backbone programatically (e.g., to change `trainable`)
classifier.backbone.trainable = False

Raw string inputs with customized preprocessing.
```python
# Create a dataset with raw string features in an `(x, y)` format.
features = ["The quick brown fox jumped.", "I forgot my homework."]
labels = [0, 3]

# Use a shorter sequence length.
preprocessor = keras_nlp.models.AlbertPreprocessor.from_preset(
"albert_base_en_uncased",
sequence_length=128,
)

# Create a AlbertClassifier and fit your data.
classifier = keras_nlp.models.AlbertClassifier.from_preset(
"albert_base_en_uncased",
num_classes=4,
preprocessor=preprocessor,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=features, y=labels, batch_size=2)
```

Preprocessed inputs.
```python
# Create a dataset with preprocessed features in an `(x, y)` format.
preprocessed_features = {
"token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
"segment_ids": tf.constant(
[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
"padding_mask": tf.constant(
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
),
}
labels = [0, 3]

# Create a ALBERT classifier and fit your data.
classifier = keras_nlp.models.AlbertClassifier.from_preset(
"albert_base_en_uncased",
num_classes=4,
preprocessor=None,
)
classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
classifier.fit(x=preprocessed_features, y=labels, batch_size=2)
```
"""

def __init__(
self,
backbone,
num_classes=2,
dropout=0.1,
preprocessor=None,
**kwargs,
):
inputs = backbone.input
pooled = backbone(inputs)["pooled_output"]
pooled = keras.layers.Dropout(dropout)(pooled)
outputs = keras.layers.Dense(
num_classes,
kernel_initializer=albert_kernel_initializer(),
name="logits",
)(pooled)
# Instantiate using Functional API Model constructor
super().__init__(
inputs=inputs,
outputs=outputs,
include_preprocessing=preprocessor is not None,
**kwargs,
)
# All references to `self` below this line
self._backbone = backbone
self._preprocessor = preprocessor
self.num_classes = num_classes
self.dropout = dropout

def get_config(self):
config = super().get_config()
config.update(
{
"num_classes": self.num_classes,
"dropout": self.dropout,
}
)

return config

@classproperty
def backbone_cls(cls):
return AlbertBackbone

@classproperty
def preprocessor_cls(cls):
return AlbertPreprocessor

@classproperty
def presets(cls):
return copy.deepcopy({**backbone_presets})
146 changes: 146 additions & 0 deletions keras_nlp/models/albert/albert_classifier_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright 2022 The KerasNLP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for BERT classification model."""

import io
import os

import sentencepiece
import tensorflow as tf
from absl.testing import parameterized
from tensorflow import keras

from keras_nlp.models.albert.albert_backbone import AlbertBackbone
from keras_nlp.models.albert.albert_classifier import AlbertClassifier
from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
from keras_nlp.models.albert.albert_tokenizer import AlbertTokenizer


class AlbertClassifierTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
self.backbone = AlbertBackbone(
vocabulary_size=1000,
num_layers=2,
num_heads=2,
embedding_dim=8,
hidden_dim=64,
intermediate_dim=128,
max_sequence_length=128,
name="encoder",
)

bytes_io = io.BytesIO()
vocab_data = tf.data.Dataset.from_tensor_slices(
["the quick brown fox", "the earth is round"]
)
sentencepiece.SentencePieceTrainer.train(
sentence_iterator=vocab_data.as_numpy_iterator(),
model_writer=bytes_io,
vocab_size=10,
model_type="WORD",
pad_id=0,
unk_id=1,
bos_id=2,
eos_id=3,
pad_piece="<pad>",
unk_piece="<unk>",
bos_piece="[CLS]",
eos_piece="[SEP]",
)
self.proto = bytes_io.getvalue()

tokenizer = AlbertTokenizer(proto=self.proto)

self.preprocessor = AlbertPreprocessor(
tokenizer=tokenizer,
sequence_length=8,
)
self.classifier = AlbertClassifier(
self.backbone,
4,
preprocessor=self.preprocessor,
)
self.classifier_no_preprocessing = AlbertClassifier(
self.backbone,
4,
preprocessor=None,
)

self.raw_batch = tf.constant(
[
"the quick brown fox.",
"the slow brown fox.",
"the smelly brown fox.",
"the old brown fox.",
]
)
self.preprocessed_batch = self.preprocessor(self.raw_batch)
self.raw_dataset = tf.data.Dataset.from_tensor_slices(
(self.raw_batch, tf.ones((4,)))
).batch(2)
self.preprocessed_dataset = self.raw_dataset.map(self.preprocessor)

def test_valid_call_classifier(self):
self.classifier(self.preprocessed_batch)

@parameterized.named_parameters(
("jit_compile_false", False), ("jit_compile_true", True)
)
def test_bert_classifier_predict(self, jit_compile):
self.classifier.compile(jit_compile=jit_compile)
self.classifier.predict(self.raw_batch)

@parameterized.named_parameters(
("jit_compile_false", False), ("jit_compile_true", True)
)
def test_bert_classifier_predict_no_preprocessing(self, jit_compile):
self.classifier_no_preprocessing.compile(jit_compile=jit_compile)
self.classifier_no_preprocessing.predict(self.preprocessed_batch)

@parameterized.named_parameters(
("jit_compile_false", False), ("jit_compile_true", True)
)
def test_bert_classifier_fit(self, jit_compile):
self.classifier.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
jit_compile=jit_compile,
)
self.classifier.fit(self.raw_dataset)

@parameterized.named_parameters(
("jit_compile_false", False), ("jit_compile_true", True)
)
def test_bert_classifier_fit_no_preprocessing(self, jit_compile):
self.classifier_no_preprocessing.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
jit_compile=jit_compile,
)
self.classifier_no_preprocessing.fit(self.preprocessed_dataset)

@parameterized.named_parameters(
("tf_format", "tf", "model"),
("keras_format", "keras_v3", "model.keras"),
)
def test_saved_model(self, save_format, filename):
model_output = self.classifier.predict(self.raw_batch)
save_path = os.path.join(self.get_temp_dir(), filename)
self.classifier.save(save_path, save_format=save_format)
restored_model = keras.models.load_model(save_path)

# Check we got the real object back.
self.assertIsInstance(restored_model, AlbertClassifier)

# Check that output matches.
restored_output = restored_model.predict(self.raw_batch)
self.assertAllClose(model_output, restored_output)
Loading