diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py index e244ac1c55e3..69dffa59b353 100644 --- a/src/transformers/modeling_electra.py +++ b/src/transformers/modeling_electra.py @@ -514,7 +514,7 @@ def __init__(self, config): def forward(self, discriminator_hidden_states): hidden_states = self.dense(discriminator_hidden_states) hidden_states = get_activation(self.config.hidden_act)(hidden_states) - logits = self.dense_prediction(hidden_states).squeeze() + logits = self.dense_prediction(hidden_states).squeeze(-1) return logits diff --git a/src/transformers/modeling_tf_electra.py b/src/transformers/modeling_tf_electra.py index 0f5ec7123671..4fc349237840 100644 --- a/src/transformers/modeling_tf_electra.py +++ b/src/transformers/modeling_tf_electra.py @@ -425,7 +425,7 @@ def __init__(self, config, **kwargs): def call(self, discriminator_hidden_states, training=False): hidden_states = self.dense(discriminator_hidden_states) hidden_states = get_tf_activation(self.config.hidden_act)(hidden_states) - logits = tf.squeeze(self.dense_prediction(hidden_states)) + logits = tf.squeeze(self.dense_prediction(hidden_states), -1) return logits diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py index 7fbdb08c87c7..e94b756b757d 100644 --- a/tests/test_modeling_tf_bert.py +++ b/tests/test_modeling_tf_bert.py @@ -17,7 +17,7 @@ import unittest from transformers import BertConfig, is_tf_available -from transformers.testing_utils import require_tf +from transformers.testing_utils import require_tf, slow from .test_configuration_common import ConfigTester from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor @@ -328,3 +328,27 @@ def test_custom_load_tf_weights(self): self.assertEqual(sorted(output_loading_info["unexpected_keys"]), ["mlm___cls", "nsp___cls"]) for layer in output_loading_info["missing_keys"]: self.assertTrue(layer.split("_")[0] in ["dropout", "classifier"]) + + +class TFBertModelIntegrationTest(unittest.TestCase): + @slow + def test_inference_masked_lm(self): + model = TFBertForPreTraining.from_pretrained("lysandre/tiny-bert-random") + input_ids = tf.constant([[0, 1, 2, 3, 4, 5]]) + output = model(input_ids)[0] + + expected_shape = [1, 6, 10] + self.assertEqual(output.shape, expected_shape) + + print(output[:, :3, :3]) + + expected_slice = tf.constant( + [ + [ + [0.03706957, 0.10124919, 0.03616843], + [-0.06099961, 0.02266058, 0.00601412], + [-0.06066202, 0.05684517, 0.02038802], + ] + ] + ) + tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4) diff --git a/tests/test_modeling_tf_electra.py b/tests/test_modeling_tf_electra.py index 2c1daf4557e2..95a570a6a526 100644 --- a/tests/test_modeling_tf_electra.py +++ b/tests/test_modeling_tf_electra.py @@ -248,3 +248,19 @@ def test_model_from_pretrained(self): for model_name in ["google/electra-small-discriminator"]: model = TFElectraModel.from_pretrained(model_name) self.assertIsNotNone(model) + + +class TFElectraModelIntegrationTest(unittest.TestCase): + @slow + def test_inference_masked_lm(self): + model = TFElectraForPreTraining.from_pretrained("lysandre/tiny-electra-random") + input_ids = tf.constant([[0, 1, 2, 3, 4, 5]]) + output = model(input_ids)[0] + + expected_shape = [1, 6] + self.assertEqual(output.shape, expected_shape) + + print(output[:, :3]) + + expected_slice = tf.constant([[-0.24651965, 0.8835437, 1.823782]]) + tf.debugging.assert_near(output[:, :3], expected_slice, atol=1e-4) diff --git a/tests/test_modeling_tf_longformer.py b/tests/test_modeling_tf_longformer.py index 0fa0bb68a8d4..d9f6d93d612d 100644 --- a/tests/test_modeling_tf_longformer.py +++ b/tests/test_modeling_tf_longformer.py @@ -622,3 +622,25 @@ def test_inference_masked_lm_long(self): tf.debugging.assert_near(tf.reduce_mean(loss), expected_loss, rtol=1e-4) tf.debugging.assert_near(tf.reduce_sum(prediction_scores), expected_prediction_scores_sum, rtol=1e-4) tf.debugging.assert_near(tf.reduce_mean(prediction_scores), expected_prediction_scores_mean, rtol=1e-4) + + @slow + def test_inference_masked_lm(self): + model = TFLongformerForMaskedLM.from_pretrained("lysandre/tiny-longformer-random") + input_ids = tf.constant([[0, 1, 2, 3, 4, 5]]) + output = model(input_ids)[0] + + expected_shape = [1, 6, 10] + self.assertEqual(output.shape, expected_shape) + + print(output[:, :3, :3]) + + expected_slice = tf.constant( + [ + [ + [-0.04926379, 0.0367098, 0.02099686], + [0.03940692, 0.01547744, -0.01448723], + [0.03495252, -0.05900355, -0.01675752], + ] + ] + ) + tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)