From 3c7c71295047e562dbf6da25c5596099ce4622c4 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Wed, 12 Oct 2022 11:53:13 +0000 Subject: [PATCH 01/11] Added onnx config whisper --- src/transformers/models/whisper/__init__.py | 4 +- .../models/whisper/configuration_whisper.py | 120 ++++++++++++++++++ src/transformers/onnx/config.py | 12 +- src/transformers/onnx/features.py | 9 ++ 4 files changed, 142 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/whisper/__init__.py b/src/transformers/models/whisper/__init__.py index 71e354a93616..2528e03a4d2c 100644 --- a/src/transformers/models/whisper/__init__.py +++ b/src/transformers/models/whisper/__init__.py @@ -21,7 +21,7 @@ _import_structure = { - "configuration_whisper": ["WHISPER_PRETRAINED_CONFIG_ARCHIVE_MAP", "WhisperConfig"], + "configuration_whisper": ["WHISPER_PRETRAINED_CONFIG_ARCHIVE_MAP", "WhisperConfig", "WhisperOnnxConfig"], "feature_extraction_whisper": ["WhisperFeatureExtractor"], "processing_whisper": ["WhisperProcessor"], "tokenization_whisper": ["WhisperTokenizer"], @@ -55,7 +55,7 @@ ] if TYPE_CHECKING: - from .configuration_whisper import WHISPER_PRETRAINED_CONFIG_ARCHIVE_MAP, WhisperConfig + from .configuration_whisper import WHISPER_PRETRAINED_CONFIG_ARCHIVE_MAP, WhisperConfig, WhisperOnnxConfig from .feature_extraction_whisper import WhisperFeatureExtractor from .processing_whisper import WhisperProcessor from .tokenization_whisper import WhisperTokenizer diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index 6ee5ee90576c..0ef48e795b3f 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -14,10 +14,18 @@ # limitations under the License. """ Whisper model configuration""" +from collections import OrderedDict +from typing import TYPE_CHECKING, Any, Mapping, Optional + +from transformers.onnx.config import OnnxConfig, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast + from ...configuration_utils import PretrainedConfig from ...utils import logging +if TYPE_CHECKING: + from ... import PreTrainedTokenizerBase, TensorType + logger = logging.get_logger(__name__) WHISPER_PRETRAINED_CONFIG_ARCHIVE_MAP = { @@ -214,3 +222,115 @@ def __init__( begin_suppress_tokens=begin_suppress_tokens, **kwargs, ) + + +class WhisperEncoderOnnxConfig(OnnxConfig): + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + return OrderedDict( + [ + ("input_features", {0: "batch", 1: "feature_size", 2: "encoder_sequence"}), + ] + ) + + @property + def outputs(self) -> Mapping[str, Mapping[int, str]]: + return OrderedDict({"last_hidden_state": {0: "batch", 1: "encoder_sequence"}}) + + +class WhisperDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast): + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + common_inputs = OrderedDict( + [ + ("input_ids", {0: "batch", 1: "past_decoder_sequence + sequence"}), + ("encoder_hidden_states", {0: "batch", 1: "encoder_sequence"}), + ] + ) + if self.use_past: + self.fill_with_past_key_values_(common_inputs, direction="inputs") + + return common_inputs + + def generate_dummy_inputs( + self, + tokenizer: "PreTrainedTokenizerBase", + batch_size: int = -1, + seq_length: int = 1, + is_pair: bool = False, + framework: Optional["TensorType"] = None, + ) -> Mapping[str, Any]: + import torch + + common_inputs = {} + dummy_input = super().generate_dummy_inputs( + tokenizer, batch_size=batch_size, seq_length=seq_length, is_pair=is_pair, framework=framework + ) + batch, encoder_seq_length = dummy_input["input_ids"].shape + encoder_hidden_states_shape = (batch, encoder_seq_length, self._config.encoder_hidden_size) + common_inputs["input_ids"] = dummy_input.pop("decoder_input_ids") + common_inputs["encoder_hidden_states"] = torch.zeros(encoder_hidden_states_shape) + + if "past_key_values" in dummy_input: + common_inputs["past_key_values"] = dummy_input.pop("past_key_values") + + return common_inputs + + @property + def outputs(self) -> Mapping[str, Mapping[int, str]]: + common_outputs = super(OnnxConfigWithPast, self).outputs + self.fill_with_past_key_values_(common_outputs, direction="outputs") + return common_outputs + + def fill_with_past_key_values_(self, inputs_or_outputs: Mapping[str, Mapping[int, str]], direction: str): + num_pkv_per_layer = 4 + _, num_decoder_layers = self.num_layers + name = "past" if direction == "inputs" else "present" + decoder_sequence = "past_decoder_sequence" if direction == "inputs" else "past_decoder_sequence + sequence" + for i in range(num_decoder_layers * num_pkv_per_layer): + inputs_or_outputs[f"{name}_key_values_{i}"] = {0: "batch", 2: decoder_sequence} + + +class WhisperOnnxConfig(OnnxSeq2SeqConfigWithPast): + @property + def inputs(self) -> None: + pass + + def get_encoder_config(self, encoder_config: PretrainedConfig) -> OnnxConfig: + r""" + Returns ONNX encoder config for `Whisper` model. + + Args: + encoder_config (`PretrainedConfig`): + The encoder model's configuration to use when exporting to ONNX. + + Returns: + [`WhisperOnnxConfig`]: An instance of the ONNX configuration object + """ + return WhisperEncoderOnnxConfig(encoder_config) + + def get_decoder_config( + self, + encoder_config: PretrainedConfig, + decoder_config: PretrainedConfig, + feature: str = "default", + use_past: bool = False, + ) -> OnnxConfig: + r""" + Returns ONNX decoder config for `Whisper` model. + + Args: + encoder_config (`PretrainedConfig`): + The encoder model's configuration to use when exporting to ONNX. + decoder_config (`PretrainedConfig`): + The decoder model's configuration to use when exporting to ONNX + feature (`str`, *optional*): + The type of feature to export the model with. + use_past (bool, *optional*): + Leverages the precomputed key/values hiddenstates when True + + Returns: + [`WhisperDecoderOnnxConfig`]: An instance of the ONNX configuration object. + """ + decoder_config.encoder_hidden_size = encoder_config.hidden_size + return WhisperDecoderOnnxConfig(decoder_config, feature, use_past=use_past) diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index 5a1c3e6eede5..fcfd33f4564f 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -104,6 +104,7 @@ class OnnxConfig(ABC): "sequence-classification": OrderedDict({"logits": {0: "batch"}}), "token-classification": OrderedDict({"logits": {0: "batch", 1: "sequence"}}), "vision2seq-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}), + "speech2seq-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}), } def __init__(self, config: "PretrainedConfig", task: str = "default", patching_specs: List[PatchingSpec] = None): @@ -325,7 +326,8 @@ def generate_dummy_inputs( seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add ) # Generate dummy inputs according to compute batch and sequence - dummy_input = [" ".join([preprocessor.unk_token]) * seq_length] * batch_size + input_token = preprocessor.unk_token if preprocessor.unk_token else "0" + dummy_input = [" ".join([input_token]) * seq_length] * batch_size if self.task == "multiple-choice": # If dynamic axis (-1) we forward with a fixed dimension of 4 candidate answers to avoid optimizations # made by ONNX @@ -345,6 +347,14 @@ def generate_dummy_inputs( batch_size = compute_effective_axis_dimension(batch_size, fixed_dimension=OnnxConfig.default_fixed_batch) dummy_input = self._generate_dummy_images(batch_size, num_channels, image_height, image_width) return dict(preprocessor(images=dummy_input, return_tensors=framework)) + elif ( + isinstance(preprocessor, FeatureExtractionMixin) and preprocessor.model_input_names[0] == "input_features" + ): + # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX + batch_size = compute_effective_axis_dimension(batch_size, fixed_dimension=OnnxConfig.default_fixed_batch) + # 80000 random samples between -1 and 1 + dummy_input = np.random.uniform(-1, 1, 80000) + return dict(preprocessor(dummy_input, return_tensors=framework)) else: raise ValueError( "Unable to generate dummy inputs for the model. Please provide a tokenizer or a preprocessor." diff --git a/src/transformers/onnx/features.py b/src/transformers/onnx/features.py index 6a0ec0f7c707..99093a453bc1 100644 --- a/src/transformers/onnx/features.py +++ b/src/transformers/onnx/features.py @@ -29,6 +29,7 @@ AutoModelForSemanticSegmentation, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, + AutoModelForSpeechSeq2Seq, AutoModelForTokenClassification, AutoModelForVision2Seq, ) @@ -100,6 +101,7 @@ class FeaturesManager: "masked-im": AutoModelForMaskedImageModeling, "semantic-segmentation": AutoModelForSemanticSegmentation, "vision2seq-lm": AutoModelForVision2Seq, + "speech2seq-lm": AutoModelForSpeechSeq2Seq, } if is_tf_available(): _TASKS_TO_TF_AUTOMODELS = { @@ -489,6 +491,13 @@ class FeaturesManager: "vit": supported_features_mapping( "default", "image-classification", "masked-im", onnx_config_cls="models.vit.ViTOnnxConfig" ), + "whisper": supported_features_mapping( + "default", + "default-with-past", + "speech2seq-lm", + "speech2seq-lm-with-past", + onnx_config_cls="models.whisper.WhisperOnnxConfig", + ), "xlm": supported_features_mapping( "default", "masked-lm", From da52a4338636a40b7b177e355ca2860c5f86ab1e Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 14 Oct 2022 09:54:07 +0000 Subject: [PATCH 02/11] added whisper support onnx --- docs/source/en/serialization.mdx | 1 + .../configuration_vision_encoder_decoder.py | 25 ++++++++++--- .../models/whisper/configuration_whisper.py | 37 ++++++++++++------- src/transformers/onnx/__main__.py | 10 +++-- src/transformers/onnx/config.py | 13 +++++++ src/transformers/onnx/convert.py | 5 ++- 6 files changed, 66 insertions(+), 25 deletions(-) diff --git a/docs/source/en/serialization.mdx b/docs/source/en/serialization.mdx index 269a7271fda0..3c61033aed81 100644 --- a/docs/source/en/serialization.mdx +++ b/docs/source/en/serialization.mdx @@ -99,6 +99,7 @@ Ready-made configurations include the following architectures: - Table Transformer - Vision Encoder decoder - ViT +- Whisper - XLM - XLM-RoBERTa - XLM-RoBERTa-XL diff --git a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py index 693c41c74691..d188c837576b 100644 --- a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py @@ -176,19 +176,26 @@ def generate_dummy_inputs( batch, encoder_sequence = dummy_input["input_ids"].shape encoder_hidden_states_shape = (batch, encoder_sequence, self._config.encoder_hidden_size) - common_inputs["input_ids"] = dummy_input.pop("input_ids") - common_inputs["attention_mask"] = dummy_input.pop("attention_mask") - common_inputs["encoder_hidden_states"] = torch.zeros(encoder_hidden_states_shape) + common_inputs["decoder_input_ids"] = dummy_input.pop("input_ids") + common_inputs["decoder_attention_mask"] = dummy_input.pop("attention_mask") + common_inputs["encoder_outputs"] = (torch.zeros(encoder_hidden_states_shape), None, None) return common_inputs + def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") + reference_model_inputs["attention_mask"] = reference_model_inputs.pop("decoder_attention_mask") + reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] + + return reference_model_inputs + class VisionEncoderDecoderOnnxConfig(OnnxConfig): @property def inputs(self) -> None: pass - def get_encoder_config(self, encoder_config: PretrainedConfig) -> OnnxConfig: + def get_encoder_config(self, encoder_config: PretrainedConfig) -> VisionEncoderDecoderEncoderOnnxConfig: r""" Returns ONNX encoder config for `VisionEncoderDecoder` model. @@ -202,8 +209,12 @@ def get_encoder_config(self, encoder_config: PretrainedConfig) -> OnnxConfig: return VisionEncoderDecoderEncoderOnnxConfig(encoder_config) def get_decoder_config( - self, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, feature: str = "default" - ) -> OnnxConfig: + self, + encoder_config: PretrainedConfig, + decoder_config: PretrainedConfig, + feature: str = "default", + use_past: bool = False, + ) -> VisionEncoderDecoderDecoderOnnxConfig: r""" Returns ONNX decoder config for `VisionEncoderDecoder` model. @@ -214,6 +225,8 @@ def get_decoder_config( The decoder model's configuration to use when exporting to ONNX feature (`str`, *optional*): The type of feature to export the model with. + use_past (bool, *optional*): + Leverages the precomputed key/values hidden states when True Returns: [`VisionEncoderDecoderDecoderOnnxConfig`]: An instance of the ONNX configuration object. diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index 0ef48e795b3f..23373726649c 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -17,9 +17,8 @@ from collections import OrderedDict from typing import TYPE_CHECKING, Any, Mapping, Optional -from transformers.onnx.config import OnnxConfig, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast - from ...configuration_utils import PretrainedConfig +from ...onnx import OnnxConfig, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast from ...utils import logging @@ -268,8 +267,8 @@ def generate_dummy_inputs( ) batch, encoder_seq_length = dummy_input["input_ids"].shape encoder_hidden_states_shape = (batch, encoder_seq_length, self._config.encoder_hidden_size) - common_inputs["input_ids"] = dummy_input.pop("decoder_input_ids") - common_inputs["encoder_hidden_states"] = torch.zeros(encoder_hidden_states_shape) + common_inputs["decoder_input_ids"] = dummy_input.pop("decoder_input_ids") + common_inputs["encoder_outputs"] = (torch.zeros(encoder_hidden_states_shape), None, None) if "past_key_values" in dummy_input: common_inputs["past_key_values"] = dummy_input.pop("past_key_values") @@ -280,15 +279,21 @@ def generate_dummy_inputs( def outputs(self) -> Mapping[str, Mapping[int, str]]: common_outputs = super(OnnxConfigWithPast, self).outputs self.fill_with_past_key_values_(common_outputs, direction="outputs") + return common_outputs - def fill_with_past_key_values_(self, inputs_or_outputs: Mapping[str, Mapping[int, str]], direction: str): - num_pkv_per_layer = 4 - _, num_decoder_layers = self.num_layers - name = "past" if direction == "inputs" else "present" - decoder_sequence = "past_decoder_sequence" if direction == "inputs" else "past_decoder_sequence + sequence" - for i in range(num_decoder_layers * num_pkv_per_layer): - inputs_or_outputs[f"{name}_key_values_{i}"] = {0: "batch", 2: decoder_sequence} + def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") + reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] + + return reference_model_inputs + + @property + def values_override(self) -> Optional[Mapping[str, Any]]: + if hasattr(self._config, "use_cache"): + return {"use_cache": True} + + return None class WhisperOnnxConfig(OnnxSeq2SeqConfigWithPast): @@ -296,7 +301,7 @@ class WhisperOnnxConfig(OnnxSeq2SeqConfigWithPast): def inputs(self) -> None: pass - def get_encoder_config(self, encoder_config: PretrainedConfig) -> OnnxConfig: + def get_encoder_config(self, encoder_config: PretrainedConfig) -> WhisperEncoderOnnxConfig: r""" Returns ONNX encoder config for `Whisper` model. @@ -315,7 +320,7 @@ def get_decoder_config( decoder_config: PretrainedConfig, feature: str = "default", use_past: bool = False, - ) -> OnnxConfig: + ) -> WhisperDecoderOnnxConfig: r""" Returns ONNX decoder config for `Whisper` model. @@ -327,10 +332,14 @@ def get_decoder_config( feature (`str`, *optional*): The type of feature to export the model with. use_past (bool, *optional*): - Leverages the precomputed key/values hiddenstates when True + Leverages the precomputed key/values hidden states when True Returns: [`WhisperDecoderOnnxConfig`]: An instance of the ONNX configuration object. """ decoder_config.encoder_hidden_size = encoder_config.hidden_size + + if "-with-past" in feature: + feature = feature.replace("-with-past", "") + return WhisperDecoderOnnxConfig(decoder_config, feature, use_past=use_past) diff --git a/src/transformers/onnx/__main__.py b/src/transformers/onnx/__main__.py index b84e12edbb24..d979b74b6855 100644 --- a/src/transformers/onnx/__main__.py +++ b/src/transformers/onnx/__main__.py @@ -22,7 +22,7 @@ from .features import FeaturesManager -ENCODER_DECODER_MODELS = ["vision-encoder-decoder"] +ENCODER_DECODER_MODELS = ["vision-encoder-decoder", "whisper"] def main(): @@ -65,6 +65,8 @@ def main(): args = parser.parse_args() args.output = args.output if args.output.is_file() else args.output.joinpath("model.onnx") + use_past = True if "-with-past" in args.feature else False + if not args.output.parent.exists(): args.output.parent.mkdir(parents=True) @@ -82,7 +84,7 @@ def main(): encoder_onnx_config = onnx_config.get_encoder_config(encoder_model.config) decoder_onnx_config = onnx_config.get_decoder_config( - encoder_model.config, decoder_model.config, feature=args.feature + encoder_model.config, decoder_model.config, feature=args.feature, use_past=use_past ) if args.opset is None: @@ -117,7 +119,7 @@ def main(): onnx_inputs, onnx_outputs = export( preprocessor, - decoder_model, + model, decoder_onnx_config, args.opset, args.output.parent.joinpath("decoder_model.onnx"), @@ -126,7 +128,7 @@ def main(): validate_model_outputs( decoder_onnx_config, preprocessor, - decoder_model, + model, args.output.parent.joinpath("decoder_model.onnx"), onnx_outputs, args.atol if args.atol else decoder_onnx_config.atol_for_validation, diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index fcfd33f4564f..7292887e4681 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -360,6 +360,19 @@ def generate_dummy_inputs( "Unable to generate dummy inputs for the model. Please provide a tokenizer or a preprocessor." ) + def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + """ + Generate inputs for onnxruntime using the reference model inputs. + + Args: + reference_model_inputs: ([`Mapping[str, Tensor]`): + Reference inputs for the model. + + Returns: + Mapping[str, Tensor] holding the kwargs to provide to the model's forward function + """ + return reference_model_inputs + def patch_ops(self): for spec in self._patching_specs: custom_op = spec.custom_op if spec.op_wrapper is None else spec.op_wrapper(spec.custom_op) diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index 234724699e82..7c44e41c169a 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -404,9 +404,12 @@ def validate_model_outputs( else: ref_outputs_dict[name] = value + # Create onnxruntime inputs from the reference model inputs + reference_model_inputs_onnxruntime = config.generate_dummy_inputs_onnxruntime(reference_model_inputs) + # We flatten potential collection of inputs (i.e. past_keys) onnx_inputs = {} - for name, value in reference_model_inputs.items(): + for name, value in reference_model_inputs_onnxruntime.items(): if isinstance(value, (list, tuple)): value = config.flatten_output_collection_property(name, value) onnx_inputs.update({tensor_name: pt_tensor.numpy() for tensor_name, pt_tensor in value.items()}) From f61ae968d2ff7a5969cc7555176e105e42bacafc Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 18 Oct 2022 09:09:59 +0000 Subject: [PATCH 03/11] add audio input data --- .../models/whisper/configuration_whisper.py | 12 +++++++-- src/transformers/onnx/config.py | 25 +++++++++++++++++-- src/transformers/onnx/convert.py | 16 +++++++++++- tests/onnx/test_onnx_v2.py | 15 +++++++---- 4 files changed, 58 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index 23373726649c..ce95b1989e5a 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -236,6 +236,10 @@ def inputs(self) -> Mapping[str, Mapping[int, str]]: def outputs(self) -> Mapping[str, Mapping[int, str]]: return OrderedDict({"last_hidden_state": {0: "batch", 1: "encoder_sequence"}}) + @property + def atol_for_validation(self) -> float: + return 1e-4 + class WhisperDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast): @property @@ -295,6 +299,10 @@ def values_override(self) -> Optional[Mapping[str, Any]]: return None + @property + def atol_for_validation(self) -> float: + return 1e-4 + class WhisperOnnxConfig(OnnxSeq2SeqConfigWithPast): @property @@ -329,9 +337,9 @@ def get_decoder_config( The encoder model's configuration to use when exporting to ONNX. decoder_config (`PretrainedConfig`): The decoder model's configuration to use when exporting to ONNX - feature (`str`, *optional*): + feature (`str`, *optional*, defaults to `default`): The type of feature to export the model with. - use_past (bool, *optional*): + use_past (bool, *optional*, defaults to `False`): Leverages the precomputed key/values hidden states when True Returns: diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index 7292887e4681..b94a9bcb22c9 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -263,6 +263,19 @@ def _generate_dummy_images( images.append(Image.fromarray(data.astype("uint8")).convert("RGB")) return images + def _generate_dummy_audio( + self, batch_size: int = 2, sampling_rate: int = 22050, time_duration: float = 5.0, frequency: int = 220 + ): + audio_data = [] + for _ in range(batch_size): + # time variable + t = np.linspace(0, time_duration, int(time_duration * sampling_rate), endpoint=False) + + # generate pure sine wave at `frequency` Hz + audio_data.append(0.5 * np.sin(2 * np.pi * frequency * t)) + + return audio_data + def generate_dummy_inputs( self, preprocessor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"], @@ -274,6 +287,9 @@ def generate_dummy_inputs( num_channels: int = 3, image_width: int = 40, image_height: int = 40, + sampling_rate: int = 22050, + time_duration: float = 5.0, + frequency: int = 220, tokenizer: "PreTrainedTokenizerBase" = None, ) -> Mapping[str, Any]: """ @@ -298,6 +314,12 @@ def generate_dummy_inputs( The width of the generated images. image_height (`int`, *optional*, defaults to 40): The height of the generated images. + sampling_rate (`int`, *optional* defaults to 22050) + The sampling rate for audio data generation. + time_duration (`int`, *optional* defaults to 5 sec) + Total seconds of sampling for audio data generation. + frequency (`int`, *optional* defaults to 220) + The desired natural frequency of generated audio. Returns: Mapping[str, Tensor] holding the kwargs to provide to the model's forward function @@ -352,8 +374,7 @@ def generate_dummy_inputs( ): # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX batch_size = compute_effective_axis_dimension(batch_size, fixed_dimension=OnnxConfig.default_fixed_batch) - # 80000 random samples between -1 and 1 - dummy_input = np.random.uniform(-1, 1, 80000) + dummy_input = self._generate_dummy_audio(batch_size, sampling_rate, time_duration, frequency) return dict(preprocessor(dummy_input, return_tensors=framework)) else: raise ValueError( diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index 7c44e41c169a..e953207b3a59 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -145,7 +145,21 @@ def export_pytorch( device = torch.device(device) if device.type == "cuda" and torch.cuda.is_available(): model.to(device) - model_inputs = dict((k, v.to(device)) for k, v in model_inputs.items()) + model_inputs_device = dict() + for k, v in model_inputs.items(): + if isinstance(v, Tuple): + model_inputs_device[k] = tuple( + x.to(device) if isinstance(x, torch.Tensor) else None for x in v + ) + elif isinstance(v, List): + model_inputs_device[k] = [ + tuple(x.to(device) if isinstance(x, torch.Tensor) else None for x in t) for t in v + ] + else: + model_inputs_device[k] = v.to(device) + + model_inputs = model_inputs_device + inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys()) onnx_outputs = list(config.outputs.keys()) diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index 81cd55d3bb5a..be9a06824aa8 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -221,6 +221,7 @@ def test_values_override(self): PYTORCH_EXPORT_ENCODER_DECODER_MODELS = { ("vision-encoder-decoder", "nlpconnect/vit-gpt2-image-captioning"), + ("whisper", "openai/whisper-tiny.en"), } PYTORCH_EXPORT_WITH_PAST_MODELS = { @@ -374,15 +375,19 @@ def _onnx_export_encoder_decoder_models( encoder_model = model.get_encoder() decoder_model = model.get_decoder() + use_past = True if "-with-past" in feature else False + encoder_onnx_config = onnx_config.get_encoder_config(encoder_model.config) - decoder_onnx_config = onnx_config.get_decoder_config(encoder_model.config, decoder_model.config, feature) + decoder_onnx_config = onnx_config.get_decoder_config( + encoder_model.config, decoder_model.config, feature, use_past + ) preprocessor = AutoFeatureExtractor.from_pretrained(model_name) onnx_opset = max(encoder_onnx_config.default_onnx_opset, decoder_onnx_config.default_onnx_opset) with NamedTemporaryFile("w") as encoder_output: - onnx_inputs, onnx_outputs = export( + _, onnx_outputs = export( preprocessor, encoder_model, encoder_onnx_config, onnx_opset, Path(encoder_output.name), device=device ) validate_model_outputs( @@ -397,9 +402,9 @@ def _onnx_export_encoder_decoder_models( preprocessor = AutoTokenizer.from_pretrained(model_name) with NamedTemporaryFile("w") as decoder_output: - onnx_inputs, onnx_outputs = export( + _, onnx_outputs = export( preprocessor, - decoder_model, + model, decoder_onnx_config, onnx_config.default_onnx_opset, Path(decoder_output.name), @@ -408,7 +413,7 @@ def _onnx_export_encoder_decoder_models( validate_model_outputs( decoder_onnx_config, preprocessor, - decoder_model, + model, Path(decoder_output.name), onnx_outputs, decoder_onnx_config.atol_for_validation, From 79ad032bf6e22cbe1122f86f2e210e5b9a25160d Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 14 Oct 2022 09:54:07 +0000 Subject: [PATCH 04/11] added whisper support onnx --- src/transformers/models/whisper/configuration_whisper.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index ce95b1989e5a..f5e8996ae15b 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -299,10 +299,6 @@ def values_override(self) -> Optional[Mapping[str, Any]]: return None - @property - def atol_for_validation(self) -> float: - return 1e-4 - class WhisperOnnxConfig(OnnxSeq2SeqConfigWithPast): @property @@ -339,7 +335,7 @@ def get_decoder_config( The decoder model's configuration to use when exporting to ONNX feature (`str`, *optional*, defaults to `default`): The type of feature to export the model with. - use_past (bool, *optional*, defaults to `False`): + use_past (bool, *optional*): Leverages the precomputed key/values hidden states when True Returns: From 8bd740b482f9dff975a9317c1fb32671bf6b08c8 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 18 Oct 2022 12:15:06 +0000 Subject: [PATCH 05/11] fixed the seqlength value --- src/transformers/models/whisper/configuration_whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index f5e8996ae15b..d6de25b261ca 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -259,7 +259,7 @@ def generate_dummy_inputs( self, tokenizer: "PreTrainedTokenizerBase", batch_size: int = -1, - seq_length: int = 1, + seq_length: int = -1, is_pair: bool = False, framework: Optional["TensorType"] = None, ) -> Mapping[str, Any]: From ba50fa32c56f2a69180a0ed12a4e4fc26c5d3491 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 21 Oct 2022 12:07:50 +0000 Subject: [PATCH 06/11] Updated the whisper onnx ocnfig --- .../configuration_vision_encoder_decoder.py | 33 ++--- .../models/whisper/configuration_whisper.py | 136 +++++------------- src/transformers/onnx/__main__.py | 6 +- tests/onnx/test_onnx_v2.py | 8 +- 4 files changed, 47 insertions(+), 136 deletions(-) diff --git a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py index d188c837576b..168b33d6d297 100644 --- a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py @@ -176,58 +176,41 @@ def generate_dummy_inputs( batch, encoder_sequence = dummy_input["input_ids"].shape encoder_hidden_states_shape = (batch, encoder_sequence, self._config.encoder_hidden_size) - common_inputs["decoder_input_ids"] = dummy_input.pop("input_ids") - common_inputs["decoder_attention_mask"] = dummy_input.pop("attention_mask") - common_inputs["encoder_outputs"] = (torch.zeros(encoder_hidden_states_shape), None, None) + common_inputs["input_ids"] = dummy_input.pop("input_ids") + common_inputs["attention_mask"] = dummy_input.pop("attention_mask") + common_inputs["encoder_hidden_states"] = torch.zeros(encoder_hidden_states_shape) return common_inputs - def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: - reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") - reference_model_inputs["attention_mask"] = reference_model_inputs.pop("decoder_attention_mask") - reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] - - return reference_model_inputs - class VisionEncoderDecoderOnnxConfig(OnnxConfig): @property def inputs(self) -> None: pass - def get_encoder_config(self, encoder_config: PretrainedConfig) -> VisionEncoderDecoderEncoderOnnxConfig: + def get_encoder_config(self, encoder_config: PretrainedConfig) -> OnnxConfig: r""" - Returns ONNX encoder config for `VisionEncoderDecoder` model. - Args: + Returns ONNX encoder config for `VisionEncoderDecoder` model. encoder_config (`PretrainedConfig`): The encoder model's configuration to use when exporting to ONNX. - Returns: [`VisionEncoderDecoderEncoderOnnxConfig`]: An instance of the ONNX configuration object """ return VisionEncoderDecoderEncoderOnnxConfig(encoder_config) def get_decoder_config( - self, - encoder_config: PretrainedConfig, - decoder_config: PretrainedConfig, - feature: str = "default", - use_past: bool = False, - ) -> VisionEncoderDecoderDecoderOnnxConfig: + self, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, feature: str = "default" + ) -> OnnxConfig: r""" - Returns ONNX decoder config for `VisionEncoderDecoder` model. - Args: + Returns ONNX decoder config for `VisionEncoderDecoder` model. encoder_config (`PretrainedConfig`): The encoder model's configuration to use when exporting to ONNX. decoder_config (`PretrainedConfig`): The decoder model's configuration to use when exporting to ONNX feature (`str`, *optional*): The type of feature to export the model with. - use_past (bool, *optional*): - Leverages the precomputed key/values hidden states when True - Returns: [`VisionEncoderDecoderDecoderOnnxConfig`]: An instance of the ONNX configuration object. """ diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index d6de25b261ca..7ebf65ac4eb0 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -15,15 +15,16 @@ """ Whisper model configuration""" from collections import OrderedDict -from typing import TYPE_CHECKING, Any, Mapping, Optional +from typing import TYPE_CHECKING, Any, Mapping, Optional, Union from ...configuration_utils import PretrainedConfig -from ...onnx import OnnxConfig, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast +from ...onnx import OnnxConfig, OnnxSeq2SeqConfigWithPast from ...utils import logging if TYPE_CHECKING: from ... import PreTrainedTokenizerBase, TensorType + from ...feature_extraction_utils import FeatureExtractionMixin logger = logging.get_logger(__name__) @@ -223,33 +224,21 @@ def __init__( ) -class WhisperEncoderOnnxConfig(OnnxConfig): +class WhisperOnnxConfig(OnnxSeq2SeqConfigWithPast): @property def inputs(self) -> Mapping[str, Mapping[int, str]]: - return OrderedDict( + common_inputs = OrderedDict( [ ("input_features", {0: "batch", 1: "feature_size", 2: "encoder_sequence"}), ] ) + if self.use_past: + common_inputs["decoder_input_ids"] = {0: "batch"} + common_inputs["decoder_attention_mask"] = {0: "batch", 1: "past_decoder_sequence + sequence"} + else: + common_inputs["decoder_input_ids"] = {0: "batch", 1: "decoder_sequence"} + common_inputs["decoder_attention_mask"] = {0: "batch", 1: "decoder_sequence"} - @property - def outputs(self) -> Mapping[str, Mapping[int, str]]: - return OrderedDict({"last_hidden_state": {0: "batch", 1: "encoder_sequence"}}) - - @property - def atol_for_validation(self) -> float: - return 1e-4 - - -class WhisperDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast): - @property - def inputs(self) -> Mapping[str, Mapping[int, str]]: - common_inputs = OrderedDict( - [ - ("input_ids", {0: "batch", 1: "past_decoder_sequence + sequence"}), - ("encoder_hidden_states", {0: "batch", 1: "encoder_sequence"}), - ] - ) if self.use_past: self.fill_with_past_key_values_(common_inputs, direction="inputs") @@ -257,93 +246,38 @@ def inputs(self) -> Mapping[str, Mapping[int, str]]: def generate_dummy_inputs( self, - tokenizer: "PreTrainedTokenizerBase", + preprocessor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"], batch_size: int = -1, seq_length: int = -1, is_pair: bool = False, framework: Optional["TensorType"] = None, + sampling_rate: int = 22050, + time_duration: float = 5.0, + frequency: int = 220, ) -> Mapping[str, Any]: - import torch - - common_inputs = {} - dummy_input = super().generate_dummy_inputs( - tokenizer, batch_size=batch_size, seq_length=seq_length, is_pair=is_pair, framework=framework + dummy_inputs = OrderedDict() + encoder_inputs = OnnxConfig.generate_dummy_inputs( + self, + preprocessor=preprocessor.feature_extractor, + batch_size=batch_size, + framework=framework, + sampling_rate=sampling_rate, + time_duration=time_duration, + frequency=frequency, + ) + decoder_inputs = super().generate_dummy_inputs( + preprocessor.tokenizer, batch_size, seq_length, is_pair, framework ) - batch, encoder_seq_length = dummy_input["input_ids"].shape - encoder_hidden_states_shape = (batch, encoder_seq_length, self._config.encoder_hidden_size) - common_inputs["decoder_input_ids"] = dummy_input.pop("decoder_input_ids") - common_inputs["encoder_outputs"] = (torch.zeros(encoder_hidden_states_shape), None, None) - - if "past_key_values" in dummy_input: - common_inputs["past_key_values"] = dummy_input.pop("past_key_values") - - return common_inputs - - @property - def outputs(self) -> Mapping[str, Mapping[int, str]]: - common_outputs = super(OnnxConfigWithPast, self).outputs - self.fill_with_past_key_values_(common_outputs, direction="outputs") - - return common_outputs - - def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: - reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") - reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] - - return reference_model_inputs - @property - def values_override(self) -> Optional[Mapping[str, Any]]: - if hasattr(self._config, "use_cache"): - return {"use_cache": True} + dummy_inputs["input_features"] = encoder_inputs.pop("input_features") + dummy_inputs["decoder_input_ids"] = decoder_inputs.pop("decoder_input_ids") + dummy_inputs["decoder_attention_mask"] = decoder_inputs.pop("attention_mask") - return None + if "past_key_values" in decoder_inputs: + dummy_inputs["past_key_values"] = decoder_inputs.pop("past_key_values") + return dummy_inputs -class WhisperOnnxConfig(OnnxSeq2SeqConfigWithPast): @property - def inputs(self) -> None: - pass - - def get_encoder_config(self, encoder_config: PretrainedConfig) -> WhisperEncoderOnnxConfig: - r""" - Returns ONNX encoder config for `Whisper` model. - - Args: - encoder_config (`PretrainedConfig`): - The encoder model's configuration to use when exporting to ONNX. - - Returns: - [`WhisperOnnxConfig`]: An instance of the ONNX configuration object - """ - return WhisperEncoderOnnxConfig(encoder_config) - - def get_decoder_config( - self, - encoder_config: PretrainedConfig, - decoder_config: PretrainedConfig, - feature: str = "default", - use_past: bool = False, - ) -> WhisperDecoderOnnxConfig: - r""" - Returns ONNX decoder config for `Whisper` model. - - Args: - encoder_config (`PretrainedConfig`): - The encoder model's configuration to use when exporting to ONNX. - decoder_config (`PretrainedConfig`): - The decoder model's configuration to use when exporting to ONNX - feature (`str`, *optional*, defaults to `default`): - The type of feature to export the model with. - use_past (bool, *optional*): - Leverages the precomputed key/values hidden states when True - - Returns: - [`WhisperDecoderOnnxConfig`]: An instance of the ONNX configuration object. - """ - decoder_config.encoder_hidden_size = encoder_config.hidden_size - - if "-with-past" in feature: - feature = feature.replace("-with-past", "") - - return WhisperDecoderOnnxConfig(decoder_config, feature, use_past=use_past) + def atol_for_validation(self) -> float: + return 1e-3 diff --git a/src/transformers/onnx/__main__.py b/src/transformers/onnx/__main__.py index d979b74b6855..6fdfb99c11ce 100644 --- a/src/transformers/onnx/__main__.py +++ b/src/transformers/onnx/__main__.py @@ -22,7 +22,7 @@ from .features import FeaturesManager -ENCODER_DECODER_MODELS = ["vision-encoder-decoder", "whisper"] +ENCODER_DECODER_MODELS = ["vision-encoder-decoder"] def main(): @@ -65,8 +65,6 @@ def main(): args = parser.parse_args() args.output = args.output if args.output.is_file() else args.output.joinpath("model.onnx") - use_past = True if "-with-past" in args.feature else False - if not args.output.parent.exists(): args.output.parent.mkdir(parents=True) @@ -84,7 +82,7 @@ def main(): encoder_onnx_config = onnx_config.get_encoder_config(encoder_model.config) decoder_onnx_config = onnx_config.get_decoder_config( - encoder_model.config, decoder_model.config, feature=args.feature, use_past=use_past + encoder_model.config, decoder_model.config, feature=args.feature ) if args.opset is None: diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index be9a06824aa8..2b7f558eddc7 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -217,11 +217,11 @@ def test_values_override(self): ("yolos", "hustvl/yolos-tiny"), ("segformer", "nvidia/segformer-b0-finetuned-ade-512-512"), ("swin", "microsoft/swin-tiny-patch4-window7-224"), + ("whisper", "openai/whisper-tiny.en"), } PYTORCH_EXPORT_ENCODER_DECODER_MODELS = { ("vision-encoder-decoder", "nlpconnect/vit-gpt2-image-captioning"), - ("whisper", "openai/whisper-tiny.en"), } PYTORCH_EXPORT_WITH_PAST_MODELS = { @@ -375,12 +375,8 @@ def _onnx_export_encoder_decoder_models( encoder_model = model.get_encoder() decoder_model = model.get_decoder() - use_past = True if "-with-past" in feature else False - encoder_onnx_config = onnx_config.get_encoder_config(encoder_model.config) - decoder_onnx_config = onnx_config.get_decoder_config( - encoder_model.config, decoder_model.config, feature, use_past - ) + decoder_onnx_config = onnx_config.get_decoder_config(encoder_model.config, decoder_model.config, feature) preprocessor = AutoFeatureExtractor.from_pretrained(model_name) From f9557ddec70851f64479a69c5dadd340c830e445 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 21 Oct 2022 12:47:30 +0000 Subject: [PATCH 07/11] restore files to old version --- .../configuration_vision_encoder_decoder.py | 6 ++++-- src/transformers/onnx/__main__.py | 4 ++-- tests/onnx/test_onnx_v2.py | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py index 168b33d6d297..4690662766fe 100644 --- a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py @@ -190,8 +190,9 @@ def inputs(self) -> None: def get_encoder_config(self, encoder_config: PretrainedConfig) -> OnnxConfig: r""" - Args: Returns ONNX encoder config for `VisionEncoderDecoder` model. + + Args: encoder_config (`PretrainedConfig`): The encoder model's configuration to use when exporting to ONNX. Returns: @@ -203,8 +204,9 @@ def get_decoder_config( self, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, feature: str = "default" ) -> OnnxConfig: r""" - Args: Returns ONNX decoder config for `VisionEncoderDecoder` model. + + Args: encoder_config (`PretrainedConfig`): The encoder model's configuration to use when exporting to ONNX. decoder_config (`PretrainedConfig`): diff --git a/src/transformers/onnx/__main__.py b/src/transformers/onnx/__main__.py index 6fdfb99c11ce..b84e12edbb24 100644 --- a/src/transformers/onnx/__main__.py +++ b/src/transformers/onnx/__main__.py @@ -117,7 +117,7 @@ def main(): onnx_inputs, onnx_outputs = export( preprocessor, - model, + decoder_model, decoder_onnx_config, args.opset, args.output.parent.joinpath("decoder_model.onnx"), @@ -126,7 +126,7 @@ def main(): validate_model_outputs( decoder_onnx_config, preprocessor, - model, + decoder_model, args.output.parent.joinpath("decoder_model.onnx"), onnx_outputs, args.atol if args.atol else decoder_onnx_config.atol_for_validation, diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index 2b7f558eddc7..b0928d1af59a 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -383,7 +383,7 @@ def _onnx_export_encoder_decoder_models( onnx_opset = max(encoder_onnx_config.default_onnx_opset, decoder_onnx_config.default_onnx_opset) with NamedTemporaryFile("w") as encoder_output: - _, onnx_outputs = export( + onnx_inputs, onnx_outputs = export( preprocessor, encoder_model, encoder_onnx_config, onnx_opset, Path(encoder_output.name), device=device ) validate_model_outputs( @@ -400,7 +400,7 @@ def _onnx_export_encoder_decoder_models( with NamedTemporaryFile("w") as decoder_output: _, onnx_outputs = export( preprocessor, - model, + decoder_model, decoder_onnx_config, onnx_config.default_onnx_opset, Path(decoder_output.name), @@ -409,7 +409,7 @@ def _onnx_export_encoder_decoder_models( validate_model_outputs( decoder_onnx_config, preprocessor, - model, + decoder_model, Path(decoder_output.name), onnx_outputs, decoder_onnx_config.atol_for_validation, From 85fdb6825acb0a3b8cbdb7c3bc7d52061d762b7a Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 21 Oct 2022 13:14:44 +0000 Subject: [PATCH 08/11] removed attention mask from inputs --- .../configuration_vision_encoder_decoder.py | 2 ++ src/transformers/models/whisper/configuration_whisper.py | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py index 4690662766fe..693c41c74691 100644 --- a/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py @@ -195,6 +195,7 @@ def get_encoder_config(self, encoder_config: PretrainedConfig) -> OnnxConfig: Args: encoder_config (`PretrainedConfig`): The encoder model's configuration to use when exporting to ONNX. + Returns: [`VisionEncoderDecoderEncoderOnnxConfig`]: An instance of the ONNX configuration object """ @@ -213,6 +214,7 @@ def get_decoder_config( The decoder model's configuration to use when exporting to ONNX feature (`str`, *optional*): The type of feature to export the model with. + Returns: [`VisionEncoderDecoderDecoderOnnxConfig`]: An instance of the ONNX configuration object. """ diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index 7ebf65ac4eb0..ff71b0f7101d 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -234,10 +234,8 @@ def inputs(self) -> Mapping[str, Mapping[int, str]]: ) if self.use_past: common_inputs["decoder_input_ids"] = {0: "batch"} - common_inputs["decoder_attention_mask"] = {0: "batch", 1: "past_decoder_sequence + sequence"} else: common_inputs["decoder_input_ids"] = {0: "batch", 1: "decoder_sequence"} - common_inputs["decoder_attention_mask"] = {0: "batch", 1: "decoder_sequence"} if self.use_past: self.fill_with_past_key_values_(common_inputs, direction="inputs") @@ -271,7 +269,6 @@ def generate_dummy_inputs( dummy_inputs["input_features"] = encoder_inputs.pop("input_features") dummy_inputs["decoder_input_ids"] = decoder_inputs.pop("decoder_input_ids") - dummy_inputs["decoder_attention_mask"] = decoder_inputs.pop("attention_mask") if "past_key_values" in decoder_inputs: dummy_inputs["past_key_values"] = decoder_inputs.pop("past_key_values") From b1a2e4a0c8c76e2f76152e66f4210e205c475817 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 31 Oct 2022 09:28:44 +0000 Subject: [PATCH 09/11] Updated get_dummy_input_onnxruntime docstring --- src/transformers/onnx/config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index b94a9bcb22c9..3189070faee8 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -383,14 +383,15 @@ def generate_dummy_inputs( def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: """ - Generate inputs for onnxruntime using the reference model inputs. + Generate inputs for ONNX Runtime using the reference model inputs. Override this to run inference with seq2seq + models which have the encoder and decoder exported as separate ONNX files. Args: - reference_model_inputs: ([`Mapping[str, Tensor]`): + reference_model_inputs ([`Mapping[str, Tensor]`): Reference inputs for the model. Returns: - Mapping[str, Tensor] holding the kwargs to provide to the model's forward function + `Mapping[str, Tensor]`: The mapping holding the kwargs to provide to the model's forward function """ return reference_model_inputs From 59a596582e80552ce7aa16cc708c37f02edafff0 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 1 Nov 2022 06:22:27 +0000 Subject: [PATCH 10/11] Updated relative imports and token generation --- src/transformers/models/whisper/configuration_whisper.py | 3 ++- src/transformers/onnx/config.py | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/whisper/configuration_whisper.py b/src/transformers/models/whisper/configuration_whisper.py index ff71b0f7101d..c25dab667d47 100644 --- a/src/transformers/models/whisper/configuration_whisper.py +++ b/src/transformers/models/whisper/configuration_whisper.py @@ -23,8 +23,9 @@ if TYPE_CHECKING: - from ... import PreTrainedTokenizerBase, TensorType from ...feature_extraction_utils import FeatureExtractionMixin + from ...tokenization_utils_base import PreTrainedTokenizerBase + from ...utils import TensorType logger = logging.get_logger(__name__) diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index 3189070faee8..a9753f29fbc2 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -316,7 +316,7 @@ def generate_dummy_inputs( The height of the generated images. sampling_rate (`int`, *optional* defaults to 22050) The sampling rate for audio data generation. - time_duration (`int`, *optional* defaults to 5 sec) + time_duration (`float`, *optional* defaults to 5.0 sec) Total seconds of sampling for audio data generation. frequency (`int`, *optional* defaults to 220) The desired natural frequency of generated audio. @@ -348,7 +348,11 @@ def generate_dummy_inputs( seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add ) # Generate dummy inputs according to compute batch and sequence - input_token = preprocessor.unk_token if preprocessor.unk_token else "0" + input_token = ( + preprocessor.unk_token + if (preprocessor.unk_token is not None and len(preprocessor.unk_token) > 0) + else "0" + ) dummy_input = [" ".join([input_token]) * seq_length] * batch_size if self.task == "multiple-choice": # If dynamic axis (-1) we forward with a fixed dimension of 4 candidate answers to avoid optimizations From 0d1904ca9b0dc0012faf74dc2d6545793dd8c7b6 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 1 Nov 2022 06:28:00 +0000 Subject: [PATCH 11/11] update docstring --- src/transformers/onnx/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index a9753f29fbc2..1c8d10939a14 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -316,7 +316,7 @@ def generate_dummy_inputs( The height of the generated images. sampling_rate (`int`, *optional* defaults to 22050) The sampling rate for audio data generation. - time_duration (`float`, *optional* defaults to 5.0 sec) + time_duration (`float`, *optional* defaults to 5.0) Total seconds of sampling for audio data generation. frequency (`int`, *optional* defaults to 220) The desired natural frequency of generated audio.