Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
dbb12ff
add error message
ydshieh Mar 29, 2022
0ee384d
Use names in the error message
ydshieh Mar 29, 2022
adaffb2
allow ModelOutput
ydshieh Apr 1, 2022
4ab0cd5
rename to check_pt_tf_outputs and move outside
ydshieh Apr 1, 2022
4326096
fix style
ydshieh Apr 1, 2022
e189948
skip past_key_values in a better way
ydshieh Apr 1, 2022
4968a04
Add comments
ydshieh Apr 2, 2022
44e4810
improve code for label/loss
ydshieh Apr 2, 2022
02b5e39
make the logic clear by moving the ignore keys out
ydshieh Apr 2, 2022
f29bb1c
fix _postprocessing_to_ignore
ydshieh Apr 2, 2022
cac73b0
fix _postprocessing_to_ignore: create new outputs from the remaining …
ydshieh Apr 2, 2022
39abab3
ignore past_key_values in TFGPT2 models for now
ydshieh Apr 2, 2022
1a0721f
make check_pt_tf_outputs better regarding names
ydshieh Apr 3, 2022
eeefa95
move check_pt_tf_models outside
ydshieh Apr 4, 2022
a7869b3
rename methods
ydshieh Apr 5, 2022
8a03a14
remove test_pt_tf_model_equivalence in TFCLIPModelTest
ydshieh Apr 6, 2022
4b6abcf
Reduce TFViTMAEModelTest.test_pt_tf_model_equivalence
ydshieh Apr 6, 2022
c5ba554
move prepare_pt_inputs_from_tf_inputs outside check_pt_tf_models
ydshieh Apr 6, 2022
4828345
Fix quality
ydshieh Apr 6, 2022
1ecb04f
Clean-up TFLxmertModelTester.test_pt_tf_model_equivalence
ydshieh Apr 6, 2022
b7cc2c5
Fix quality
ydshieh Apr 6, 2022
f42529f
fix
ydshieh Apr 6, 2022
3b3386c
fix style
ydshieh Apr 6, 2022
4d77c0c
Clean-up TFLEDModelTest.test_pt_tf_model_equivalence
ydshieh Apr 6, 2022
5a41d5c
Fix quality
ydshieh Apr 6, 2022
fe9529f
add docstring
ydshieh Apr 6, 2022
b703e6c
improve comment
ydshieh Apr 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 1 addition & 126 deletions tests/clip/test_modeling_tf_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@

import requests
from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig
from transformers.testing_utils import is_pt_tf_cross_test, require_tf, require_vision, slow
from transformers.testing_utils import require_tf, require_vision, slow
from transformers.utils import is_tf_available, is_vision_available

from ..test_configuration_common import ConfigTester
from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask


if is_tf_available():
import numpy as np
import tensorflow as tf

from transformers import TFCLIPModel, TFCLIPTextModel, TFCLIPVisionModel, TFSharedEmbeddings
Expand Down Expand Up @@ -497,130 +496,6 @@ def test_keras_save_load(self):
after_outputs = model(inputs_dict)
self.assert_outputs_same(after_outputs, outputs)

# overwrite from common since CLIPModel/TFCLIPModel return CLIPOutput/TFCLIPOutput
@is_pt_tf_cross_test
def test_pt_tf_model_equivalence(self):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need this anymore - the test in TF common can handle nested outputs, including instances of ModelOutput.

import torch

import transformers

config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

for model_class in self.all_model_classes:
pt_model_class_name = model_class.__name__[2:] # Skip the "TF" at the beginning
pt_model_class = getattr(transformers, pt_model_class_name)

config.output_hidden_states = True

tf_model = model_class(config)
pt_model = pt_model_class(config)

# Check we can load pt model in tf and vice-versa with model => model functions

tf_model = transformers.load_pytorch_model_in_tf2_model(
tf_model, pt_model, tf_inputs=self._prepare_for_class(inputs_dict, model_class)
)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)

# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval()
pt_inputs_dict = {}
for name, key in self._prepare_for_class(inputs_dict, model_class).items():
if type(key) == bool:
pt_inputs_dict[name] = key
elif name == "input_values":
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
elif name == "pixel_values":
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
else:
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)

# need to rename encoder-decoder "inputs" for PyTorch
if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

with torch.no_grad():
pto = pt_model(**pt_inputs_dict)
tfo = tf_model(self._prepare_for_class(inputs_dict, model_class), training=False)

self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch")
for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):

if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)):
continue

tf_out = tf_output.numpy()
pt_out = pt_output.numpy()

self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")

if len(tf_out.shape) > 0:

tf_nans = np.copy(np.isnan(tf_out))
pt_nans = np.copy(np.isnan(pt_out))

pt_out[tf_nans] = 0
tf_out[tf_nans] = 0
pt_out[pt_nans] = 0
tf_out[pt_nans] = 0

max_diff = np.amax(np.abs(tf_out - pt_out))
self.assertLessEqual(max_diff, 4e-2)

# Check we can load pt model in tf and vice-versa with checkpoint => model functions
with tempfile.TemporaryDirectory() as tmpdirname:
pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
torch.save(pt_model.state_dict(), pt_checkpoint_path)
tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)

tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)

# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval()
pt_inputs_dict = {}
for name, key in self._prepare_for_class(inputs_dict, model_class).items():
if type(key) == bool:
key = np.array(key, dtype=bool)
pt_inputs_dict[name] = torch.from_numpy(key).to(torch.long)
elif name == "input_values":
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
elif name == "pixel_values":
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
else:
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)
# need to rename encoder-decoder "inputs" for PyTorch
if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

with torch.no_grad():
pto = pt_model(**pt_inputs_dict)
tfo = tf_model(self._prepare_for_class(inputs_dict, model_class))

self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch")
for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):

if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)):
continue

tf_out = tf_output.numpy()
pt_out = pt_output.numpy()

self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")

if len(tf_out.shape) > 0:
tf_nans = np.copy(np.isnan(tf_out))
pt_nans = np.copy(np.isnan(pt_out))

pt_out[tf_nans] = 0
tf_out[tf_nans] = 0
pt_out[pt_nans] = 0
tf_out[pt_nans] = 0

max_diff = np.amax(np.abs(tf_out - pt_out))
self.assertLessEqual(max_diff, 4e-2)

@slow
def test_model_from_pretrained(self):
for model_name in TF_CLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
Expand Down
125 changes: 1 addition & 124 deletions tests/led/test_modeling_tf_led.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
import unittest

from transformers import LEDConfig, is_tf_available
from transformers.testing_utils import is_pt_tf_cross_test, require_tf, slow
from transformers.testing_utils import require_tf, slow

from ..test_configuration_common import ConfigTester
from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor


if is_tf_available():
import numpy as np
import tensorflow as tf

from transformers import TFLEDForConditionalGeneration, TFLEDModel
Expand Down Expand Up @@ -362,128 +361,6 @@ def check_encoder_attentions_output(outputs):
self.assertEqual(model.config.output_hidden_states, True)
check_encoder_attentions_output(outputs)

# TODO: Remove this once a more thorough pt/tf equivalence could be implemented in `test_modeling_tf_common.py`.
# (Currently, such a test will fail some other model tests: it requires some time to fix them.)
@is_pt_tf_cross_test
def test_pt_tf_model_equivalence_extra(self):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was done before to make TF-LED having a strong test, while the common version was still a loose test.

Now the common test is (very) strong, we no longer need this test in TF-LED test.

import torch

import transformers

def prepare_pt_inputs_from_tf_inputs(tf_inputs_dict):

pt_inputs_dict = {}
for name, key in tf_inputs_dict.items():
if type(key) == bool:
pt_inputs_dict[name] = key
elif name == "input_values":
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
elif name == "pixel_values":
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)
else:
pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)

return pt_inputs_dict

config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

for model_class in self.all_model_classes:
pt_model_class_name = model_class.__name__[2:] # Skip the "TF" at the beginning
pt_model_class = getattr(transformers, pt_model_class_name)

config.output_hidden_states = True

tf_model = model_class(config)
pt_model = pt_model_class(config)

tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
tf_inputs_dict_maybe_with_labels = self._prepare_for_class(inputs_dict, model_class, return_labels=True)

# Check we can load pt model in tf and vice-versa with model => model functions

tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)

# Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences
pt_model.eval()

pt_inputs_dict = prepare_pt_inputs_from_tf_inputs(tf_inputs_dict)
pt_inputs_dict_maybe_with_labels = prepare_pt_inputs_from_tf_inputs(tf_inputs_dict_maybe_with_labels)

# need to rename encoder-decoder "inputs" for PyTorch
if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

with torch.no_grad():
pto = pt_model(**pt_inputs_dict)
tfo = tf_model(tf_inputs_dict, training=False)

tf_hidden_states = tfo[0].numpy()
pt_hidden_states = pto[0].numpy()

tf_nans = np.isnan(tf_hidden_states)
pt_nans = np.isnan(pt_hidden_states)

pt_hidden_states[tf_nans] = 0
tf_hidden_states[tf_nans] = 0
pt_hidden_states[pt_nans] = 0
tf_hidden_states[pt_nans] = 0

max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
self.assertLessEqual(max_diff, 1e-4)

has_labels = any(
x in tf_inputs_dict_maybe_with_labels for x in ["labels", "next_sentence_label", "start_positions"]
)
if has_labels:

with torch.no_grad():
pto = pt_model(**pt_inputs_dict_maybe_with_labels)
tfo = tf_model(tf_inputs_dict_maybe_with_labels, training=False)

# Some models' output class don't have `loss` attribute despite `labels` is used.
tf_loss = getattr(tfo, "loss", None)
pt_loss = getattr(pto, "loss", None)

# Some models require extra condition to return loss. For example, `BertForPreTraining` requires both
# `labels` and `next_sentence_label`.
# Moreover, some PT models return loss while the corresponding TF/Flax models don't.
if tf_loss is not None and pt_loss is not None:

tf_loss = tf.math.reduce_mean(tf_loss).numpy()
pt_loss = pt_loss.numpy()

tf_nans = np.isnan(tf_loss)
pt_nans = np.isnan(pt_loss)
# the 2 losses need to be both nan or both not nan
# (`TapasForQuestionAnswering` gives nan loss here)
self.assertEqual(tf_nans, pt_nans)

if not tf_nans:
max_diff = np.amax(np.abs(tf_loss - pt_loss))
# `TFFunnelForTokenClassification` (and potentially other TF token classification models) give
# large difference (up to 0.1x). PR #15294 addresses this issue.
# There is also an inconsistency between PT/TF `XLNetLMHeadModel`.
# Before these issues are fixed & merged, set a higher threshold here to pass the test.
self.assertLessEqual(max_diff, 1e-4)

tf_logits = tfo[1].numpy()
pt_logits = pto[1].numpy()

# check on the shape
self.assertEqual(tf_logits.shape, pt_logits.shape)

tf_nans = np.isnan(tf_logits)
pt_nans = np.isnan(pt_logits)

pt_logits[tf_nans] = 0
tf_logits[tf_nans] = 0
pt_logits[pt_nans] = 0
tf_logits[pt_nans] = 0

max_diff = np.amax(np.abs(tf_logits - pt_logits))
self.assertLessEqual(max_diff, 1e-4)

def test_xla_mode(self):
# TODO JP: Make LED XLA compliant
pass
Expand Down
Loading