From 9db746664d9f45e28727cdf9b1e51f8af3d1a0d1 Mon Sep 17 00:00:00 2001 From: Plutone11011 Date: Thu, 2 Feb 2023 11:33:22 +0100 Subject: [PATCH 1/3] default compilation for DistilBERT/ALBERT/XlmRoBERTa/FNet/DeBERTa --- keras_nlp/models/albert/albert_classifier.py | 9 +++++++++ keras_nlp/models/albert/albert_classifier_test.py | 3 +++ keras_nlp/models/deberta_v3/deberta_v3_classifier.py | 9 +++++++++ .../models/deberta_v3/deberta_v3_classifier_test.py | 3 +++ keras_nlp/models/distil_bert/distil_bert_classifier.py | 8 ++++++++ .../models/distil_bert/distil_bert_classifier_test.py | 3 +++ keras_nlp/models/f_net/f_net_classifier.py | 8 ++++++++ keras_nlp/models/f_net/f_net_classifier_test.py | 3 +++ keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py | 8 ++++++++ .../models/xlm_roberta/xlm_roberta_classifier_test.py | 3 +++ 10 files changed, 57 insertions(+) diff --git a/keras_nlp/models/albert/albert_classifier.py b/keras_nlp/models/albert/albert_classifier.py index 20f6e1a89a..a6b3eb41e3 100644 --- a/keras_nlp/models/albert/albert_classifier.py +++ b/keras_nlp/models/albert/albert_classifier.py @@ -22,6 +22,7 @@ from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor from keras_nlp.models.albert.albert_presets import backbone_presets from keras_nlp.models.task import Task +from keras_nlp.utils.keras_utils import is_xla_compatible from keras_nlp.utils.python_utils import classproperty @@ -173,6 +174,14 @@ def __init__( self.num_classes = num_classes self.dropout = dropout + # Default compilation + self.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + metrics=keras.metrics.SparseCategoricalAccuracy(), + jit_compile=is_xla_compatible(self), + ) + def get_config(self): config = super().get_config() config.update( diff --git a/keras_nlp/models/albert/albert_classifier_test.py b/keras_nlp/models/albert/albert_classifier_test.py index 6f586b9c95..622e8bae3b 100644 --- a/keras_nlp/models/albert/albert_classifier_test.py +++ b/keras_nlp/models/albert/albert_classifier_test.py @@ -108,6 +108,9 @@ def test_albert_classifier_predict_no_preprocessing(self, jit_compile): self.classifier_no_preprocessing.compile(jit_compile=jit_compile) self.classifier_no_preprocessing.predict(self.preprocessed_batch) + def test_albert_classifier_fit_default_compile(self): + self.classifier.fit(self.raw_dataset) + @parameterized.named_parameters( ("jit_compile_false", False), ("jit_compile_true", True) ) diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py index 3ea01dd102..660d0a99e1 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py @@ -26,6 +26,7 @@ ) from keras_nlp.models.deberta_v3.deberta_v3_presets import backbone_presets from keras_nlp.models.task import Task +from keras_nlp.utils.keras_utils import is_xla_compatible from keras_nlp.utils.python_utils import classproperty @@ -199,6 +200,14 @@ def __init__( self.hidden_dim = hidden_dim self.dropout = dropout + # Default compilation + self.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + metrics=keras.metrics.SparseCategoricalAccuracy(), + jit_compile=is_xla_compatible(self), + ) + def get_config(self): config = super().get_config() config.update( diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier_test.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier_test.py index 1b57b8aac3..9266a9010d 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_classifier_test.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier_test.py @@ -106,6 +106,9 @@ def test_classifier_predict_no_preprocessing(self, jit_compile): self.classifier_no_preprocessing.compile(jit_compile=jit_compile) self.classifier_no_preprocessing.predict(self.preprocessed_batch) + def test_debertav3_classifier_fit_default_compile(self): + self.classifier.fit(self.raw_dataset) + @parameterized.named_parameters( ("jit_compile_false", False), ("jit_compile_true", True) ) diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier.py b/keras_nlp/models/distil_bert/distil_bert_classifier.py index 6ed81d8625..b20c556db4 100644 --- a/keras_nlp/models/distil_bert/distil_bert_classifier.py +++ b/keras_nlp/models/distil_bert/distil_bert_classifier.py @@ -26,6 +26,7 @@ ) from keras_nlp.models.distil_bert.distil_bert_presets import backbone_presets from keras_nlp.models.task import Task +from keras_nlp.utils.keras_utils import is_xla_compatible from keras_nlp.utils.python_utils import classproperty @@ -202,6 +203,13 @@ def __init__( self.hidden_dim = hidden_dim self.dropout = dropout + self.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + metrics=keras.metrics.SparseCategoricalAccuracy(), + jit_compile=is_xla_compatible(self), + ) + def get_config(self): config = super().get_config() config.update( diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier_test.py b/keras_nlp/models/distil_bert/distil_bert_classifier_test.py index e0d9f89c09..a9a0fc06d1 100644 --- a/keras_nlp/models/distil_bert/distil_bert_classifier_test.py +++ b/keras_nlp/models/distil_bert/distil_bert_classifier_test.py @@ -109,6 +109,9 @@ def test_classifier_fit_no_preprocessing(self, jit_compile): ) self.classifier_no_preprocessing.fit(self.preprocessed_dataset) + def test_distilbert_classifier_fit_default_compile(self): + self.classifier.fit(self.raw_dataset) + @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), diff --git a/keras_nlp/models/f_net/f_net_classifier.py b/keras_nlp/models/f_net/f_net_classifier.py index 8f7f08a0b0..6e43b76f76 100644 --- a/keras_nlp/models/f_net/f_net_classifier.py +++ b/keras_nlp/models/f_net/f_net_classifier.py @@ -23,6 +23,7 @@ from keras_nlp.models.f_net.f_net_preprocessor import FNetPreprocessor from keras_nlp.models.f_net.f_net_presets import backbone_presets from keras_nlp.models.task import Task +from keras_nlp.utils.keras_utils import is_xla_compatible from keras_nlp.utils.python_utils import classproperty @@ -121,6 +122,13 @@ def __init__( self.num_classes = num_classes self.dropout = dropout + self.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + metrics=keras.metrics.SparseCategoricalAccuracy(), + jit_compile=is_xla_compatible(self), + ) + def get_config(self): config = super().get_config() config.update( diff --git a/keras_nlp/models/f_net/f_net_classifier_test.py b/keras_nlp/models/f_net/f_net_classifier_test.py index cf4120fe83..7593b80908 100644 --- a/keras_nlp/models/f_net/f_net_classifier_test.py +++ b/keras_nlp/models/f_net/f_net_classifier_test.py @@ -107,6 +107,9 @@ def test_fnet_classifier_predict_no_preprocessing(self, jit_compile): self.classifier_no_preprocessing.compile(jit_compile=jit_compile) self.classifier_no_preprocessing.predict(self.preprocessed_batch) + def test_fnet_classifier_fit_default_compile(self): + self.classifier.fit(self.raw_dataset) + @parameterized.named_parameters( ("jit_compile_false", False), ("jit_compile_true", True) ) diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py index 0e250ff859..c50ea42b86 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py @@ -24,6 +24,7 @@ XLMRobertaPreprocessor, ) from keras_nlp.models.xlm_roberta.xlm_roberta_presets import backbone_presets +from keras_nlp.utils.keras_utils import is_xla_compatible from keras_nlp.utils.python_utils import classproperty @@ -196,6 +197,13 @@ def __init__( self.hidden_dim = hidden_dim self.dropout = dropout + self.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(5e-5), + metrics=keras.metrics.SparseCategoricalAccuracy(), + jit_compile=is_xla_compatible(self), + ) + def preprocess_samples(self, x, y=None, sample_weight=None): return self.preprocessor(x, y=y, sample_weight=sample_weight) diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier_test.py b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier_test.py index b1e7ea9265..c8efb4fbeb 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier_test.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier_test.py @@ -122,6 +122,9 @@ def test_classifier_fit_no_preprocessing(self, jit_compile): ) self.classifier_no_preprocessing.fit(self.preprocessed_dataset) + def test_xlmroberta_classifier_fit_default_compile(self): + self.classifier.fit(self.raw_dataset) + @parameterized.named_parameters( ("tf_format", "tf", "model"), ("keras_format", "keras_v3", "model.keras"), From 497ff13559820a69ebef4b51f8398dc501f62374 Mon Sep 17 00:00:00 2001 From: Plutone11011 Date: Thu, 2 Feb 2023 11:39:20 +0100 Subject: [PATCH 2/3] fix format --- keras_nlp/layers/masked_lm_mask_generator.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/keras_nlp/layers/masked_lm_mask_generator.py b/keras_nlp/layers/masked_lm_mask_generator.py index 069f35b8f6..ab03c530e9 100644 --- a/keras_nlp/layers/masked_lm_mask_generator.py +++ b/keras_nlp/layers/masked_lm_mask_generator.py @@ -147,11 +147,7 @@ def call(self, inputs): # convert dense to ragged. inputs = tf.RaggedTensor.from_tensor(inputs) - ( - token_ids, - mask_positions, - mask_ids, - ) = tf_text.mask_language_model( + (token_ids, mask_positions, mask_ids,) = tf_text.mask_language_model( inputs, item_selector=self._random_selector, mask_values_chooser=self._mask_values_chooser, From cb7c7c2697e99e1586a724d298e95cbf29b9fa3b Mon Sep 17 00:00:00 2001 From: Plutone11011 Date: Thu, 2 Feb 2023 20:32:58 +0100 Subject: [PATCH 3/3] fix outdated black version --- keras_nlp/layers/masked_lm_mask_generator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/keras_nlp/layers/masked_lm_mask_generator.py b/keras_nlp/layers/masked_lm_mask_generator.py index ab03c530e9..069f35b8f6 100644 --- a/keras_nlp/layers/masked_lm_mask_generator.py +++ b/keras_nlp/layers/masked_lm_mask_generator.py @@ -147,7 +147,11 @@ def call(self, inputs): # convert dense to ragged. inputs = tf.RaggedTensor.from_tensor(inputs) - (token_ids, mask_positions, mask_ids,) = tf_text.mask_language_model( + ( + token_ids, + mask_positions, + mask_ids, + ) = tf_text.mask_language_model( inputs, item_selector=self._random_selector, mask_values_chooser=self._mask_values_chooser,