From e6270695b7e8b3406eea8d08394085aa109eb4b8 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 14:00:11 +0100
Subject: [PATCH 1/8] Fix duplicate arguments passed to dummy inputs in ONNX
 export

---
 src/transformers/onnx/convert.py | 45 +++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py
index 42b57d2c5402..a0b4da533498 100644
--- a/src/transformers/onnx/convert.py
+++ b/src/transformers/onnx/convert.py
@@ -100,11 +100,18 @@ def export_pytorch(
         `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from
         the ONNX configuration.
     """
+    from ..tokenization_utils_base import PreTrainedTokenizerBase
+
+    if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
+        raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
     if tokenizer is not None:
         warnings.warn(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
             FutureWarning,
         )
+        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        preprocessor = tokenizer
+
     if issubclass(type(model), PreTrainedModel):
         import torch
         from torch.onnx import export as onnx_export
@@ -123,9 +130,7 @@ def export_pytorch(
 
             # Ensure inputs match
             # TODO: Check when exporting QA we provide "is_pair=True"
-            model_inputs = config.generate_dummy_inputs(
-                preprocessor, tokenizer=tokenizer, framework=TensorType.PYTORCH
-            )
+            model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.PYTORCH)
             inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys())
             onnx_outputs = list(config.outputs.keys())
 
@@ -213,11 +218,17 @@ def export_tensorflow(
     import onnx
     import tf2onnx
 
+    from ..tokenization_utils_base import PreTrainedTokenizerBase
+
+    if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
+        raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
     if tokenizer is not None:
         warnings.warn(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
             FutureWarning,
         )
+        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        preprocessor = tokenizer
 
     model.config.return_dict = True
 
@@ -229,7 +240,7 @@ def export_tensorflow(
             setattr(model.config, override_config_key, override_config_value)
 
     # Ensure inputs match
-    model_inputs = config.generate_dummy_inputs(preprocessor, tokenizer=tokenizer, framework=TensorType.TENSORFLOW)
+    model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.TENSORFLOW)
     inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys())
     onnx_outputs = list(config.outputs.keys())
 
@@ -273,11 +284,17 @@ def export(
             "Cannot convert because neither PyTorch nor TensorFlow are not installed. "
             "Please install torch or tensorflow first."
         )
+    from ..tokenization_utils_base import PreTrainedTokenizerBase
+
+    if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
+        raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
     if tokenizer is not None:
         warnings.warn(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
             FutureWarning,
         )
+        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        preprocessor = tokenizer
 
     if is_torch_available():
         from ..file_utils import torch_version
@@ -307,18 +324,26 @@ def validate_model_outputs(
 ):
     from onnxruntime import InferenceSession, SessionOptions
 
+    from ..tokenization_utils_base import PreTrainedTokenizerBase
+
     logger.info("Validating ONNX model...")
 
+    if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
+        raise ValueError("You cannot provide both a tokenizer and a preprocessor to validate model inputs.")
+    if tokenizer is not None:
+        warnings.warn(
+            "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
+            FutureWarning,
+        )
+        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        preprocessor = tokenizer
+
     # TODO: generate inputs with a different batch_size and seq_len that was used for conversion to properly test
     # dynamic input shapes.
     if issubclass(type(reference_model), PreTrainedModel):
-        reference_model_inputs = config.generate_dummy_inputs(
-            preprocessor, tokenizer=tokenizer, framework=TensorType.PYTORCH
-        )
+        reference_model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.PYTORCH)
     else:
-        reference_model_inputs = config.generate_dummy_inputs(
-            preprocessor, tokenizer=tokenizer, framework=TensorType.TENSORFLOW
-        )
+        reference_model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.TENSORFLOW)
 
     # Create ONNX Runtime session
     options = SessionOptions()

From 0af46e6be235d64b88242faaffcd7446e75810c9 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 14:32:04 +0100
Subject: [PATCH 2/8] Fix logging messages

---
 src/transformers/onnx/convert.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py
index a0b4da533498..28baa1c7771e 100644
--- a/src/transformers/onnx/convert.py
+++ b/src/transformers/onnx/convert.py
@@ -103,7 +103,7 @@ def export_pytorch(
     from ..tokenization_utils_base import PreTrainedTokenizerBase
 
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
-        raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
+        raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.")
     if tokenizer is not None:
         warnings.warn(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
@@ -221,7 +221,7 @@ def export_tensorflow(
     from ..tokenization_utils_base import PreTrainedTokenizerBase
 
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
-        raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
+        raise ValueError("You cannot provide both a tokenizer and preprocessor to export the model.")
     if tokenizer is not None:
         warnings.warn(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
@@ -287,7 +287,7 @@ def export(
     from ..tokenization_utils_base import PreTrainedTokenizerBase
 
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
-        raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
+        raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.")
     if tokenizer is not None:
         warnings.warn(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
@@ -329,7 +329,7 @@ def validate_model_outputs(
     logger.info("Validating ONNX model...")
 
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
-        raise ValueError("You cannot provide both a tokenizer and a preprocessor to validate model inputs.")
+        raise ValueError("You cannot provide both a tokenizer and a preprocessor to validatethe model outputs.")
     if tokenizer is not None:
         warnings.warn(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",

From d6e0361f9f17813d21164ffab93522ed48c33543 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 14:32:18 +0100
Subject: [PATCH 3/8] Fix M2M100 ONNX config

---
 src/transformers/models/m2m_100/configuration_m2m_100.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/m2m_100/configuration_m2m_100.py b/src/transformers/models/m2m_100/configuration_m2m_100.py
index 62a63d248b90..180950f8c7b9 100644
--- a/src/transformers/models/m2m_100/configuration_m2m_100.py
+++ b/src/transformers/models/m2m_100/configuration_m2m_100.py
@@ -198,13 +198,13 @@ def _generate_dummy_inputs_for_sequence_classification_and_question_answering(
         # Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
         # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
         batch_size = compute_effective_axis_dimension(
-            batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
+            batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
         )
 
         # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
         token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
         seq_length = compute_effective_axis_dimension(
-            seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
+            seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
         )
 
         # Generate dummy inputs according to compute batch and sequence

From abe606da79dd710a43bb9fbe72c137c84d3e5f99 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 16:12:37 +0100
Subject: [PATCH 4/8] Integrate reviewer comments

---
 src/transformers/onnx/convert.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py
index 28baa1c7771e..e47a73aa18c4 100644
--- a/src/transformers/onnx/convert.py
+++ b/src/transformers/onnx/convert.py
@@ -22,6 +22,7 @@
 from packaging.version import Version, parse
 
 from ..file_utils import TensorType, is_tf_available, is_torch_available, is_torch_onnx_dict_inputs_support_available
+from ..tokenization_utils_base import PreTrainedTokenizerBase
 from ..utils import logging
 from .config import OnnxConfig
 
@@ -100,7 +101,6 @@ def export_pytorch(
         `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from
         the ONNX configuration.
     """
-    from ..tokenization_utils_base import PreTrainedTokenizerBase
 
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
         raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.")
@@ -109,7 +109,7 @@ def export_pytorch(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
             FutureWarning,
         )
-        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
         preprocessor = tokenizer
 
     if issubclass(type(model), PreTrainedModel):
@@ -218,8 +218,6 @@ def export_tensorflow(
     import onnx
     import tf2onnx
 
-    from ..tokenization_utils_base import PreTrainedTokenizerBase
-
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
         raise ValueError("You cannot provide both a tokenizer and preprocessor to export the model.")
     if tokenizer is not None:
@@ -227,7 +225,7 @@ def export_tensorflow(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
             FutureWarning,
         )
-        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
         preprocessor = tokenizer
 
     model.config.return_dict = True
@@ -284,7 +282,6 @@ def export(
             "Cannot convert because neither PyTorch nor TensorFlow are not installed. "
             "Please install torch or tensorflow first."
         )
-    from ..tokenization_utils_base import PreTrainedTokenizerBase
 
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
         raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.")
@@ -293,7 +290,7 @@ def export(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
             FutureWarning,
         )
-        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
         preprocessor = tokenizer
 
     if is_torch_available():
@@ -324,8 +321,6 @@ def validate_model_outputs(
 ):
     from onnxruntime import InferenceSession, SessionOptions
 
-    from ..tokenization_utils_base import PreTrainedTokenizerBase
-
     logger.info("Validating ONNX model...")
 
     if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:

From ee6a1e0195b76620aab7521d8eeb3623a21f9684 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 16:13:15 +0100
Subject: [PATCH 5/8] Ensure we check PreTrained model only if torch is
 available

---
 src/transformers/onnx/convert.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py
index e47a73aa18c4..cb646948a821 100644
--- a/src/transformers/onnx/convert.py
+++ b/src/transformers/onnx/convert.py
@@ -330,12 +330,12 @@ def validate_model_outputs(
             "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
             FutureWarning,
         )
-        logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
+        logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
         preprocessor = tokenizer
 
     # TODO: generate inputs with a different batch_size and seq_len that was used for conversion to properly test
     # dynamic input shapes.
-    if issubclass(type(reference_model), PreTrainedModel):
+    if is_torch_available() and issubclass(type(reference_model), PreTrainedModel):
         reference_model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.PYTORCH)
     else:
         reference_model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.TENSORFLOW)
@@ -388,7 +388,7 @@ def validate_model_outputs(
 
     # Check the shape and values match
     for name, ort_value in zip(onnx_named_outputs, onnx_outputs):
-        if issubclass(type(reference_model), PreTrainedModel):
+        if is_torch_available() and issubclass(type(reference_model), PreTrainedModel):
             ref_value = ref_outputs_dict[name].detach().numpy()
         else:
             ref_value = ref_outputs_dict[name].numpy()
@@ -422,7 +422,7 @@ def ensure_model_and_config_inputs_match(
 
     :param model_inputs: :param config_inputs: :return:
     """
-    if issubclass(type(model), PreTrainedModel):
+    if is_torch_available() and issubclass(type(model), PreTrainedModel):
         forward_parameters = signature(model.forward).parameters
     else:
         forward_parameters = signature(model.call).parameters

From 8f9558612c6f34debaa2e95c731f8559756ae81b Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 17:42:51 +0100
Subject: [PATCH 6/8] Remove TensorFlow tests for models without PyTorch parity

---
 tests/onnx/test_onnx_v2.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py
index a0a5e0f943a5..103bec4881d6 100644
--- a/tests/onnx/test_onnx_v2.py
+++ b/tests/onnx/test_onnx_v2.py
@@ -196,15 +196,12 @@ def test_values_override(self):
     ("m2m-100", "facebook/m2m100_418M"),
 }
 
+# TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations.
 TENSORFLOW_EXPORT_DEFAULT_MODELS = {
     ("albert", "hf-internal-testing/tiny-albert"),
     ("bert", "bert-base-cased"),
-    ("ibert", "kssteven/ibert-roberta-base"),
-    ("camembert", "camembert-base"),
     ("distilbert", "distilbert-base-cased"),
     ("roberta", "roberta-base"),
-    ("xlm-roberta", "xlm-roberta-base"),
-    ("layoutlm", "microsoft/layoutlm-base-uncased"),
 }
 
 TENSORFLOW_EXPORT_WITH_PAST_MODELS = {
@@ -212,12 +209,8 @@ def test_values_override(self):
     ("gpt-neo", "EleutherAI/gpt-neo-125M"),
 }
 
-TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {
-    ("bart", "facebook/bart-base"),
-    ("mbart", "sshleifer/tiny-mbart"),
-    ("t5", "t5-small"),
-    ("marian", "Helsinki-NLP/opus-mt-en-de"),
-}
+# TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS` once TensorFlow has parity with the PyTorch model implementations.
+TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {}
 
 
 def _get_models_to_test(export_models_list):
@@ -318,7 +311,7 @@ def test_tensorflow_export(self, test_name, name, model_name, feature, onnx_conf
     def test_tensorflow_export_with_past(self, test_name, name, model_name, feature, onnx_config_class_constructor):
         self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
 
-    @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS))
+    @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS), skip_on_empty=True)
     @slow
     @require_tf
     def test_tensorflow_export_seq2seq_with_past(

From c748f0040a4648acbae6883c406531cf22bf861c Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 18:12:23 +0100
Subject: [PATCH 7/8] Remove GPT-Neo from TF tests

---
 tests/onnx/test_onnx_v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py
index 103bec4881d6..bf5b0f0a7954 100644
--- a/tests/onnx/test_onnx_v2.py
+++ b/tests/onnx/test_onnx_v2.py
@@ -204,9 +204,9 @@ def test_values_override(self):
     ("roberta", "roberta-base"),
 }
 
+# TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_WITH_PAST_MODELS` once TensorFlow has parity with the PyTorch model implementations.
 TENSORFLOW_EXPORT_WITH_PAST_MODELS = {
     ("gpt2", "gpt2"),
-    ("gpt-neo", "EleutherAI/gpt-neo-125M"),
 }
 
 # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS` once TensorFlow has parity with the PyTorch model implementations.

From 6c28992c2141faddcbdbb1ee9ff723b51710e339 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 10 Mar 2022 18:26:49 +0100
Subject: [PATCH 8/8] Remove GPT-2 from TF ONNX tests

---
 tests/onnx/test_onnx_v2.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py
index bf5b0f0a7954..26ef4370e272 100644
--- a/tests/onnx/test_onnx_v2.py
+++ b/tests/onnx/test_onnx_v2.py
@@ -205,9 +205,7 @@ def test_values_override(self):
 }
 
 # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_WITH_PAST_MODELS` once TensorFlow has parity with the PyTorch model implementations.
-TENSORFLOW_EXPORT_WITH_PAST_MODELS = {
-    ("gpt2", "gpt2"),
-}
+TENSORFLOW_EXPORT_WITH_PAST_MODELS = {}
 
 # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS` once TensorFlow has parity with the PyTorch model implementations.
 TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {}
@@ -305,7 +303,7 @@ def test_pytorch_export_seq2seq_with_past(
     def test_tensorflow_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
         self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
 
-    @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_WITH_PAST_MODELS))
+    @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_WITH_PAST_MODELS), skip_on_empty=True)
     @slow
     @require_tf
     def test_tensorflow_export_with_past(self, test_name, name, model_name, feature, onnx_config_class_constructor):