diff --git a/keras_nlp/benchmarks/README.md b/keras_nlp/benchmarks/README.md index 40ffa59d9a..14ec3a1460 100644 --- a/keras_nlp/benchmarks/README.md +++ b/keras_nlp/benchmarks/README.md @@ -25,3 +25,27 @@ the following results were obtained: To change the configuration, say, for example, number of layers in the transformer model used for inference, the user can modify the config dictionaries given at the top of the script. + +## Sentiment Analysis + +For benchmarking classification models, the following command can be run +from the root of the repository: + +```sh +python3 keras_nlp/benchmarks/sentiment_analysis.py \ + --model="BertClassifier" \ + --preset="bert_small_en_uncased" \ + --learning_rate=5e-5 \ + --num_epochs=5 \ + --batch_size=32 + --mixed_precision_policy="mixed_float16" +``` + +flag `--model` specifies the model name, and `--preset` specifies the preset under testing. `--preset` could be None, +while `--model` is required. Other flags are common training flags. + +This script outputs: + +- validation accuracy for each epoch. +- testing accuracy after training is done. +- total elapsed time (in seconds). \ No newline at end of file diff --git a/keras_nlp/benchmarks/sentiment_analysis.py b/keras_nlp/benchmarks/sentiment_analysis.py new file mode 100644 index 0000000000..426f1fd9f2 --- /dev/null +++ b/keras_nlp/benchmarks/sentiment_analysis.py @@ -0,0 +1,144 @@ +# Copyright 2023 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import inspect +import time + +import tensorflow as tf +import tensorflow_datasets as tfds +from absl import app +from absl import flags +from tensorflow import keras + +import keras_nlp + +FLAGS = flags.FLAGS +flags.DEFINE_string( + "model", + None, + "The name of the classifier such as BertClassifier.", +) +flags.DEFINE_string( + "preset", + None, + "The name of a preset, e.g. bert_base_multi.", +) + +flags.DEFINE_string( + "mixed_precision_policy", + "mixed_float16", + "The global precision policy to use. E.g. 'mixed_float16' or 'float32'.", +) + +flags.DEFINE_float("learning_rate", 5e-5, "The learning rate.") +flags.DEFINE_integer("num_epochs", 1, "The number of epochs.") +flags.DEFINE_integer("batch_size", 16, "The batch size.") + +tfds.disable_progress_bar() + +BUFFER_SIZE = 10000 + + +def create_imdb_dataset(): + dataset, info = tfds.load( + "imdb_reviews", as_supervised=True, with_info=True + ) + train_dataset, test_dataset = dataset["train"], dataset["test"] + + train_dataset = ( + train_dataset.shuffle(BUFFER_SIZE) + .batch(FLAGS.batch_size) + .prefetch(tf.data.AUTOTUNE) + ) + + # We split the test data evenly into validation and test sets. + test_dataset_size = info.splits["test"].num_examples // 2 + + val_dataset = ( + test_dataset.take(test_dataset_size) + .batch(FLAGS.batch_size) + .prefetch(tf.data.AUTOTUNE) + ) + test_dataset = ( + test_dataset.skip(test_dataset_size) + .batch(FLAGS.batch_size) + .prefetch(tf.data.AUTOTUNE) + ) + + return train_dataset, val_dataset, test_dataset + + +def create_model(): + for name, symbol in keras_nlp.models.__dict__.items(): + if inspect.isclass(symbol) and issubclass(symbol, keras.Model): + if FLAGS.model and name != FLAGS.model: + continue + if not hasattr(symbol, "from_preset"): + continue + for preset in symbol.presets: + if FLAGS.preset and preset != FLAGS.preset: + continue + model = symbol.from_preset(preset) + print(f"Using model {name} with preset {preset}") + return model + + raise ValueError(f"Model {FLAGS.model} or preset {FLAGS.preset} not found.") + + +def train_model( + model: keras.Model, + train_dataset: tf.data.Dataset, + validation_dataset: tf.data.Dataset, +): + model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + metrics=keras.metrics.SparseCategoricalAccuracy(), + jit_compile=True, + ) + + model.fit( + train_dataset, + epochs=FLAGS.num_epochs, + validation_data=validation_dataset, + verbose=2, + ) + + return model + + +def evaluate_model(model: keras.Model, test_dataset: tf.data.Dataset): + loss, accuracy = model.evaluate(test_dataset) + print(f"Test loss: {loss}") + print(f"Test accuracy: {accuracy}") + + +def main(_): + keras.mixed_precision.set_global_policy(FLAGS.mixed_precision_policy) + + # Start time + start_time = time.time() + + train_dataset, validation_dataset, test_dataset = create_imdb_dataset() + model = create_model() + model = train_model(model, train_dataset, validation_dataset) + evaluate_model(model, test_dataset) + + # End time + end_time = time.time() + print(f"Total wall time: {end_time - start_time}") + + +if __name__ == "__main__": + flags.mark_flag_as_required("model") + app.run(main)