Complete run_small_preset test for electra

pranavvp16 · pranavvp16 · commit dada19870de0 · 2024-03-19T01:55:29.000+05:30
diff --git a/keras_nlp/models/electra/electra_backbone_test.py b/keras_nlp/models/electra/electra_backbone_test.py
@@ -59,7 +59,7 @@ def test_saved_model(self):
     def test_smallest_preset(self):
         self.run_preset_test(
             cls=ElectraBackbone,
-            preset="electra-small-generator",
+            preset="electra_small_discriminator_en",
             input_data={
                 "token_ids": ops.array([[101, 1996, 4248, 102]], dtype="int32"),
                 "segment_ids": ops.zeros((1, 4), dtype="int32"),
@@ -70,10 +70,13 @@ def test_smallest_preset(self):
                 "pooled_output": (1, 256),
             },
             # The forward pass from a preset should be stable!
-            # TODO: Add sequence and pooled output trimmed to 5 tokens.
             expected_partial_output={
-                "sequence_output": (ops.array()),
-                "pooled_output": (ops.array()),
+                "sequence_output": (
+                    ops.array([0.32287, 0.18754, -0.22272, -0.24177, 1.18977])
+                ),
+                "pooled_output": (
+                    ops.array([-0.02974, 0.23383, 0.08430, -0.19471, 0.14822])
+                ),
             },
         )
 
diff --git a/keras_nlp/models/electra/electra_preprocessor.py b/keras_nlp/models/electra/electra_preprocessor.py
@@ -38,14 +38,14 @@ class ElectraPreprocessor(Preprocessor):
      2. Pack the inputs together using a `keras_nlp.layers.MultiSegmentPacker`.
        with the appropriate `"[CLS]"`, `"[SEP]"` and `"[PAD]"` tokens.
      3. Construct a dictionary of with keys `"token_ids"` and `"padding_mask"`,
-       that can be passed directly to a DistilBERT model.
+       that can be passed directly to a ELECTRA model.
 
     This layer can be used directly with `tf.data.Dataset.map` to preprocess
     string data in the `(x, y, sample_weight)` format used by
     `keras.Model.fit`.
 
     Args:
-        tokenizer: A `keras_nlp.models.DistilBertTokenizer` instance.
+        tokenizer: A `keras_nlp.models.ElectraTokenizer` instance.
         sequence_length: The length of the packed inputs.
         truncate: string. The algorithm to truncate a list of batched segments
             to fit within `sequence_length`. The value can be either
diff --git a/keras_nlp/models/electra/electra_presets.py b/keras_nlp/models/electra/electra_presets.py
@@ -20,10 +20,10 @@
                 "ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators"
                 "This is base discriminator model with 12 layers."
             ),
-            "params": "109482240",
+            "params": 109482240,
             "official_name": "ELECTRA",
             "path": "electra",
-            "model_card": "https://huggingface.co/google/electra-base-discriminator",
+            "model_card": "https://github.com/google-research/electra",
         },
         "kaggle_handle": "kaggle://pranavprajapati16/electra/keras/electra_base_discriminator_en/1",
     },
@@ -33,10 +33,10 @@
                 "ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators"
                 "This is small discriminator model with 12 layers."
             ),
-            "params": "13,548,800",
+            "params": 13548800,
             "official_name": "ELECTRA",
             "path": "electra",
-            "model_card": "https://huggingface.co/google/electra-small-discriminator",
+            "model_card": "https://github.com/google-research/electra",
         },
         "kaggle_handle": "kaggle://pranavprajapati16/electra/keras/electra_small_discriminator_en/1",
     },
@@ -46,10 +46,10 @@
                 "ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators"
                 "This is small generator model with 12 layers."
             ),
-            "params": "13548800",
+            "params": 13548800,
             "official_name": "ELECTRA",
             "path": "electra",
-            "model_card": "https://huggingface.co/google/electra-small-generator",
+            "model_card": "https://github.com/google-research/electra",
         },
         "kaggle_handle": "kaggle://pranavprajapati16/electra/keras/electra_small_generator_en/1",
     },
@@ -59,10 +59,10 @@
                 "ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators"
                 "This is base generator model with 12 layers."
             ),
-            "params": "33576960",
+            "params": 33576960,
             "official_name": "ELECTRA",
             "path": "electra",
-            "model_card": "https://huggingface.co/google/electra-base-generator",
+            "model_card": "https://github.com/google-research/electra",
         },
         "kaggle_handle": "kaggle://pranavprajapati16/electra/keras/electra_base_generator_en/1",
     },
diff --git a/keras_nlp/models/electra/electra_tokenizer_test.py b/keras_nlp/models/electra/electra_tokenizer_test.py
@@ -47,7 +47,7 @@ def test_errors_missing_special_tokens(self):
     def test_smallest_preset(self):
         self.run_preset_test(
             cls=ElectraTokenizer,
-            preset="distil_bert_base_en_uncased",
+            preset="electra_base_discriminator_en",
             input_data=["The quick brown fox."],
             expected_output=[[1996, 4248, 2829, 4419, 1012]],
         )

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ def test_errors_missing_special_tokens(self):`
`47`	`47`	`def test_smallest_preset(self):`
`48`	`48`	`self.run_preset_test(`
`49`	`49`	`cls=ElectraTokenizer,`
`50`		`- preset="distil_bert_base_en_uncased",`
	`50`	`+ preset="electra_base_discriminator_en",`
`51`	`51`	`input_data=["The quick brown fox."],`
`52`	`52`	`expected_output=[[1996, 4248, 2829, 4419, 1012]],`
`53`	`53`	`)`