From 022cea834ee4971a004d241fe50454537aa2a48b Mon Sep 17 00:00:00 2001 From: regisss Date: Wed, 29 Jun 2022 20:57:33 +0200 Subject: [PATCH 1/4] Add ONNX support for LayoutLMv3 --- docs/source/en/serialization.mdx | 1 + .../models/layoutlmv3/__init__.py | 12 +- .../layoutlmv3/configuration_layoutlmv3.py | 114 ++++++++++++++++++ src/transformers/onnx/features.py | 7 ++ tests/onnx/test_onnx_v2.py | 1 + 5 files changed, 133 insertions(+), 2 deletions(-) diff --git a/docs/source/en/serialization.mdx b/docs/source/en/serialization.mdx index f5c113e31019..034d4c1bf652 100644 --- a/docs/source/en/serialization.mdx +++ b/docs/source/en/serialization.mdx @@ -70,6 +70,7 @@ Ready-made configurations include the following architectures: - GPT-J - I-BERT - LayoutLM +- LayoutLMv3 - LongT5 - M2M100 - Marian diff --git a/src/transformers/models/layoutlmv3/__init__.py b/src/transformers/models/layoutlmv3/__init__.py index a7d104040bd6..cfa26057e87b 100644 --- a/src/transformers/models/layoutlmv3/__init__.py +++ b/src/transformers/models/layoutlmv3/__init__.py @@ -28,7 +28,11 @@ _import_structure = { - "configuration_layoutlmv3": ["LAYOUTLMV3_PRETRAINED_CONFIG_ARCHIVE_MAP", "LayoutLMv3Config"], + "configuration_layoutlmv3": [ + "LAYOUTLMV3_PRETRAINED_CONFIG_ARCHIVE_MAP", + "LayoutLMv3Config", + "LayoutLMv3OnnxConfig", + ], "processing_layoutlmv3": ["LayoutLMv3Processor"], "tokenization_layoutlmv3": ["LayoutLMv3Tokenizer"], } @@ -66,7 +70,11 @@ if TYPE_CHECKING: - from .configuration_layoutlmv3 import LAYOUTLMV3_PRETRAINED_CONFIG_ARCHIVE_MAP, LayoutLMv3Config + from .configuration_layoutlmv3 import ( + LAYOUTLMV3_PRETRAINED_CONFIG_ARCHIVE_MAP, + LayoutLMv3Config, + LayoutLMv3OnnxConfig, + ) from .processing_layoutlmv3 import LayoutLMv3Processor from .tokenization_layoutlmv3 import LayoutLMv3Tokenizer diff --git a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py index ebde107947a1..220631e07652 100644 --- a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py @@ -14,7 +14,17 @@ # limitations under the License. """ LayoutLMv3 model configuration""" +from collections import OrderedDict +from typing import Any, Mapping, Optional + +from packaging import version + +from transformers import TensorType + from ...configuration_utils import PretrainedConfig +from ...onnx import OnnxConfig +from ...onnx.utils import compute_effective_axis_dimension +from ...processing_utils import ProcessorMixin from ...utils import logging @@ -176,3 +186,107 @@ def __init__( self.num_channels = num_channels self.patch_size = patch_size self.classifier_dropout = classifier_dropout + + +class LayoutLMv3OnnxConfig(OnnxConfig): + + torch_onnx_minimum_version = version.parse("1.12") + + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + # The order of inputs is different for question answering and sequence classification + if self.task in ["question-answering", "sequence-classification"]: + return OrderedDict( + [ + ("input_ids", {0: "batch", 1: "sequence"}), + ("attention_mask", {0: "batch", 1: "sequence"}), + ("bbox", {0: "batch", 1: "sequence"}), + ("pixel_values", {0: "batch", 1: "sequence"}), + ] + ) + else: + return OrderedDict( + [ + ("input_ids", {0: "batch", 1: "sequence"}), + ("bbox", {0: "batch", 1: "sequence"}), + ("attention_mask", {0: "batch", 1: "sequence"}), + ("pixel_values", {0: "batch", 1: "sequence"}), + ] + ) + + @property + def atol_for_validation(self) -> float: + return 1e-5 + + @property + def default_onnx_opset(self) -> int: + return 12 + + def generate_dummy_inputs( + self, + processor: ProcessorMixin, + batch_size: int = -1, + seq_length: int = -1, + is_pair: bool = False, + framework: Optional[TensorType] = None, + num_channels: int = 3, + image_width: int = 40, + image_height: int = 40, + ) -> Mapping[str, Any]: + """ + Generate inputs to provide to the ONNX exporter for the specific framework + + Args: + processor ([`ProcessorMixin`]): + The processor associated with this model configuration. + batch_size (`int`, *optional*, defaults to -1): + The batch size to export the model for (-1 means dynamic axis). + seq_length (`int`, *optional*, defaults to -1): + The sequence length to export the model for (-1 means dynamic axis). + is_pair (`bool`, *optional*, defaults to `False`): + Indicate if the input is a pair (sentence 1, sentence 2). + framework (`TensorType`, *optional*, defaults to `None`): + The framework (PyTorch or TensorFlow) that the processor will generate tensors for. + num_channels (`int`, *optional*, defaults to 3): + The number of channels of the generated images. + image_width (`int`, *optional*, defaults to 40): + The width of the generated images. + image_height (`int`, *optional*, defaults to 40): + The height of the generated images. + + Returns: + Mapping[str, Any]: _description_ + """ + + # A dummy image is used so OCR should not be applied + setattr(processor.feature_extractor, "apply_ocr", False) + + # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX + batch_size = compute_effective_axis_dimension( + batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0 + ) + # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX + token_to_add = processor.tokenizer.num_special_tokens_to_add(is_pair) + seq_length = compute_effective_axis_dimension( + seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add + ) + # Generate dummy inputs according to compute batch and sequence + dummy_text = [[" ".join([processor.tokenizer.unk_token]) * seq_length]] * batch_size + + # Generate dummy bounding boxes + dummy_bboxes = [[[48, 84, 73, 128]]] * batch_size + + # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX + # batch_size = compute_effective_axis_dimension(batch_size, fixed_dimension=OnnxConfig.default_fixed_batch) + dummy_image = self._generate_dummy_images(batch_size, num_channels, image_height, image_width) + + inputs = dict( + processor( + dummy_image, + text=dummy_text, + boxes=dummy_bboxes, + return_tensors=framework, + ) + ) + + return inputs diff --git a/src/transformers/onnx/features.py b/src/transformers/onnx/features.py index c37c12ca2a89..86b241e68f71 100644 --- a/src/transformers/onnx/features.py +++ b/src/transformers/onnx/features.py @@ -313,6 +313,13 @@ class FeaturesManager: "token-classification", onnx_config_cls="models.layoutlm.LayoutLMOnnxConfig", ), + "layoutlmv3": supported_features_mapping( + "default", + "question-answering", + "sequence-classification", + "token-classification", + onnx_config_cls="models.layoutlmv3.LayoutLMv3OnnxConfig", + ), "longt5": supported_features_mapping( "default", "default-with-past", diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index 50601598f5aa..f3e5aef20f39 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -195,6 +195,7 @@ def test_values_override(self): ("xlm", "xlm-clm-ende-1024"), ("xlm-roberta", "xlm-roberta-base"), ("layoutlm", "microsoft/layoutlm-base-uncased"), + ("layoutlmv3", "microsoft/layoutlmv3-base"), ("vit", "google/vit-base-patch16-224"), ("deit", "facebook/deit-small-patch16-224"), ("beit", "microsoft/beit-base-patch16-224"), From e32dc24440b0f8f4239ba342ec689fd1c1118e14 Mon Sep 17 00:00:00 2001 From: regisss Date: Wed, 29 Jun 2022 22:45:09 +0200 Subject: [PATCH 2/4] Update docstrings --- src/transformers/onnx/convert.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index c8f75f2fd3df..d4deb796683d 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -40,6 +40,7 @@ if TYPE_CHECKING: from ..feature_extraction_utils import FeatureExtractionMixin + from ..processing_utils import ProcessorMixin from ..tokenization_utils import PreTrainedTokenizer @@ -80,7 +81,7 @@ def check_onnxruntime_requirements(minimum_version: Version): def export_pytorch( - preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"], + preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], model: "PreTrainedModel", config: OnnxConfig, opset: int, @@ -92,7 +93,7 @@ def export_pytorch( Export a PyTorch model to an ONNX Intermediate Representation (IR) Args: - preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]): + preprocessor: ([`PreTrainedTokenizer`], [`FeatureExtractionMixin`] or [`ProcessorMixin`]): The preprocessor used for encoding the data. model ([`PreTrainedModel`]): The model to export. @@ -269,7 +270,7 @@ def export_tensorflow( def export( - preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"], + preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], model: Union["PreTrainedModel", "TFPreTrainedModel"], config: OnnxConfig, opset: int, @@ -281,7 +282,7 @@ def export( Export a Pytorch or TensorFlow model to an ONNX Intermediate Representation (IR) Args: - preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]): + preprocessor: ([`PreTrainedTokenizer`], [`FeatureExtractionMixin`] or [`ProcessorMixin`]): The preprocessor used for encoding the data. model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): The model to export. @@ -339,7 +340,7 @@ def export( def validate_model_outputs( config: OnnxConfig, - preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"], + preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], reference_model: Union["PreTrainedModel", "TFPreTrainedModel"], onnx_model: Path, onnx_named_outputs: List[str], From 771c85c8c5fc8dd092e6fa5e3744ed7ce0df992b Mon Sep 17 00:00:00 2001 From: regisss Date: Wed, 29 Jun 2022 22:54:39 +0200 Subject: [PATCH 3/4] Update empty description in docstring --- src/transformers/models/layoutlmv3/configuration_layoutlmv3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py index 220631e07652..7c6119ad0996 100644 --- a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py @@ -255,7 +255,7 @@ def generate_dummy_inputs( The height of the generated images. Returns: - Mapping[str, Any]: _description_ + Mapping[str, Any]: holding the kwargs to provide to the model's forward function """ # A dummy image is used so OCR should not be applied From fdc9d9866466d875d9fdfa83b233bdf4d3fbfc51 Mon Sep 17 00:00:00 2001 From: regisss Date: Thu, 30 Jun 2022 17:37:38 +0200 Subject: [PATCH 4/4] Fix imports and type hints --- .../models/layoutlmv3/configuration_layoutlmv3.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py index 7c6119ad0996..d9ddde6289c9 100644 --- a/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/configuration_layoutlmv3.py @@ -15,19 +15,21 @@ """ LayoutLMv3 model configuration""" from collections import OrderedDict -from typing import Any, Mapping, Optional +from typing import TYPE_CHECKING, Any, Mapping, Optional from packaging import version -from transformers import TensorType - from ...configuration_utils import PretrainedConfig from ...onnx import OnnxConfig from ...onnx.utils import compute_effective_axis_dimension -from ...processing_utils import ProcessorMixin from ...utils import logging +if TYPE_CHECKING: + from ...processing_utils import ProcessorMixin + from ...utils import TensorType + + logger = logging.get_logger(__name__) LAYOUTLMV3_PRETRAINED_CONFIG_ARCHIVE_MAP = { @@ -224,11 +226,11 @@ def default_onnx_opset(self) -> int: def generate_dummy_inputs( self, - processor: ProcessorMixin, + processor: "ProcessorMixin", batch_size: int = -1, seq_length: int = -1, is_pair: bool = False, - framework: Optional[TensorType] = None, + framework: Optional["TensorType"] = None, num_channels: int = 3, image_width: int = 40, image_height: int = 40,