From 246f2d26c4e6193d6be39ca53029f3034411aa58 Mon Sep 17 00:00:00 2001 From: adit299 Date: Sat, 11 Mar 2023 11:58:29 -0500 Subject: [PATCH 01/20] commenced work on supporting audio classification task for whisper model in tensorflow --- src/transformers/models/whisper/modeling_tf_whisper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 7a76d42fd526..35427f5d25a2 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -28,6 +28,7 @@ TFBaseModelOutputWithPastAndCrossAttentions, TFSeq2SeqLMOutput, TFSeq2SeqModelOutput, + TFSequenceClassifierOutput ) from ...modeling_tf_utils import ( TFCausalLanguageModelingLoss, From 6c4d26996d933bb7d9c39a457df601b0d7209b5a Mon Sep 17 00:00:00 2001 From: adit299 Date: Fri, 31 Mar 2023 11:16:16 -0400 Subject: [PATCH 02/20] initial implementation of Whisper Audio Classification in tf finished --- .../models/whisper/modeling_tf_whisper.py | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index c5c25e315a88..95b94a8ab16d 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1387,3 +1387,88 @@ def prepare_inputs_for_generation( "decoder_attention_mask": decoder_attention_mask, "decoder_position_ids": decoder_position_ids, } + +class TFWhisperForAudioClassification(TFWhisperPreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.encoder = TFWhisperEncoder(config) + num_layers = config.num_hidden_layers + 1 + if config.use_weighted_layer_sum: + self.layer_weights = tf.Variable(tf.ones(shape=(num_layers,)) / num_layers) + self.projector = tf.keras.layers.Dense(units=config.classifier_proj_size, input_shape=(config.hidden_size,)) + self.classifier = tf.keras.layers.Dense(units=config.num_labels, input_shape=(config.classifier_proj_size,), + activation=None) + + @unpack_inputs + def call( + self, + input_features: Optional[tf.Tensor] = None, + head_mask: Optional[tf.Tensor] = None, + encoder_outputs: Optional[Tuple[Tuple[tf.Tensor]]] = None, + labels: Optional[tf.Tensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None + ): + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states + + outputs = self.encoder( + input_features, + head_mask=head_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + if self.config.use_weighted_layer_sum: + hidden_states = tf.stack(encoder_outputs, axis=1) + norm_weights = tf.nn.softmax(self.layer_weights, axis=-1) + hidden_states = tf.reduce_sum(hidden_states * tf.reshape(norm_weights, [-1, 1, 1]), axis=1) + else: + hidden_states = encoder_outputs[0] + + hidden_states = self.projector(hidden_states) + pooled_output = tf.reduce_mean(hidden_states, axis=1) + + logits = self.classifier(pooled_output) + + loss = None + + if labels is not None: + loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + loss = loss_fn(tf.reshape(labels, [-1]), tf.reshape(logits, [-1, self.config.num_labels])) + + if not return_dict: + output = (logits,) + encoder_outputs[1:] + return ((loss,) + output) if loss is not None else output + + return TFSequenceClassifierOutput( + loss=loss, + logits=logits, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + + + + + + + + + + + + + + + + + + + + + + From 5e61c4cecc15dd1c389679443169a44caa51e7ad Mon Sep 17 00:00:00 2001 From: adit299 Date: Tue, 4 Apr 2023 10:44:18 -0400 Subject: [PATCH 03/20] registering whisper audio classification in tf --- docs/source/en/model_doc/whisper.mdx | 4 ++++ src/transformers/models/whisper/__init__.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/docs/source/en/model_doc/whisper.mdx b/docs/source/en/model_doc/whisper.mdx index 22b08e4e61bc..1d4226cf8dab 100644 --- a/docs/source/en/model_doc/whisper.mdx +++ b/docs/source/en/model_doc/whisper.mdx @@ -95,6 +95,10 @@ The original code can be found [here](https://github.com/openai/whisper). [[autodoc]] TFWhisperForConditionalGeneration - call +## TFWhisperForAudioClassification + +[[autodoc]] TFWhisperForAudioClassification + - call ## FlaxWhisperModel diff --git a/src/transformers/models/whisper/__init__.py b/src/transformers/models/whisper/__init__.py index 3b6015a56f6f..34df1c8a7d32 100644 --- a/src/transformers/models/whisper/__init__.py +++ b/src/transformers/models/whisper/__init__.py @@ -63,6 +63,7 @@ "TFWhisperForConditionalGeneration", "TFWhisperModel", "TFWhisperPreTrainedModel", + "TFWhisperForAudioClassification" ] try: @@ -114,6 +115,7 @@ else: from .modeling_tf_whisper import ( TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST, + TFWhisperForAudioClassification, TFWhisperForConditionalGeneration, TFWhisperModel, TFWhisperPreTrainedModel, From d204aea7314217fa8b47e7418ead0d9973f50ccd Mon Sep 17 00:00:00 2001 From: adit299 Date: Wed, 26 Apr 2023 20:44:09 -0400 Subject: [PATCH 04/20] commencing work on writing tests --- .../models/auto/modeling_tf_auto.py | 9 + .../models/whisper/modeling_tf_whisper.py | 24 +-- src/transformers/utils/dummy_tf_objects.py | 7 +- .../whisper/test_modeling_tf_whisper.py | 190 +++++++++++++++++- 4 files changed, 205 insertions(+), 25 deletions(-) diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index 8d7d72711ec2..ee61590abaff 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -164,6 +164,13 @@ ] ) +TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict( + [ + ("whisper", "TFWhisperForAudioClassification") + ] +) + + TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict( [ # Model for Causal LM mapping @@ -427,6 +434,8 @@ ) TF_MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_MAPPING_NAMES) +TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES) + TF_MODEL_FOR_PRETRAINING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_PRETRAINING_MAPPING_NAMES) TF_MODEL_WITH_LM_HEAD_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_WITH_LM_HEAD_MAPPING_NAMES) TF_MODEL_FOR_CAUSAL_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 95b94a8ab16d..e08562059857 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1449,26 +1449,6 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - - - - - - - - - - - - - - - - - - - - - + + \ No newline at end of file diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py index 166b02854d54..9239f742eea0 100644 --- a/src/transformers/utils/dummy_tf_objects.py +++ b/src/transformers/utils/dummy_tf_objects.py @@ -2607,12 +2607,17 @@ def __init__(self, *args, **kwargs): TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST = None -class TFWhisperForConditionalGeneration(metaclass=DummyObject): +class TFWhisperForAudioClassification(metaclass=DummyObject): _backends = ["tf"] def __init__(self, *args, **kwargs): requires_backends(self, ["tf"]) +class TFWhisperForConditionalGeneration(metaclass=DummyObject): + _backends = ["tf"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["tf"]) class TFWhisperModel(metaclass=DummyObject): _backends = ["tf"] diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index 2ef3cdcee02a..10687549158e 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Testing suite for the TensorFlow Whisper model. """ - +import copy import inspect import tempfile import traceback @@ -39,7 +39,7 @@ if is_tf_available(): import tensorflow as tf - from transformers import TFWhisperForConditionalGeneration, TFWhisperModel, set_seed + from transformers import TFWhisperForConditionalGeneration, TFWhisperModel, TFWhisperForAudioClassification, set_seed from transformers.models.whisper.modeling_tf_whisper import TFWhisperDecoder, TFWhisperEncoder @@ -803,6 +803,192 @@ def _test_large_batched_generation(in_queue, out_queue, timeout): out_queue.put(results, timeout=timeout) out_queue.join() +@require_tf +class TFWhisperEncoderModelTester: + def __init__( + self, + parent, + batch_size=13, + seq_length=60, + is_training=True, + use_labels=True, + hidden_size=16, + num_hidden_layers=2, + num_attention_heads=4, + input_channels=1, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=20, + max_source_positions=30, + num_mel_bins=80, + num_conv_layers=1, + suppress_tokens=None, + begin_suppress_tokens=None, + classifier_proj_size=4, + num_labels=2, + is_encoder_decoder=False, + is_decoder=False, + ): + self.parent = parent + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_labels = use_labels + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.input_channels = input_channels + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.num_mel_bins = num_mel_bins + self.max_position_embeddings = max_position_embeddings + self.max_source_positions = max_source_positions + self.num_conv_layers = num_conv_layers + self.suppress_tokens = suppress_tokens + self.begin_suppress_tokens = begin_suppress_tokens + self.classifier_proj_size = classifier_proj_size + self.num_labels = num_labels + self.is_encoder_decoder = is_encoder_decoder + self.is_decoder = is_decoder + + def get_config(self): + return WhisperConfig( + d_model=self.hidden_size, + encoder_layers=self.num_hidden_layers, + decoder_layers=self.num_hidden_layers, + encoder_attention_heads=self.num_attention_heads, + decoder_attention_heads=self.num_attention_heads, + input_channels=self.input_channels, + dropout=self.hidden_dropout_prob, + attention_dropout=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + max_source_positions=self.max_source_positions, + decoder_ffn_dim=self.hidden_size, + encoder_ffn_dim=self.hidden_size, + suppress_tokens=self.suppress_tokens, + begin_suppress_tokens=self.begin_suppress_tokens, + classifier_proj_size=self.classifier_proj_size, + num_labels=self.num_labels, + is_encoder_decoder=self.is_encoder_decoder, + is_decoder=self.is_decoder, + ) + + def prepare_config_and_inputs(self): + input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length]) + + config = self.get_config() + inputs_dict = prepare_whisper_encoder_inputs_dict( + config, + input_features=input_features, + ) + return config, inputs_dict + + def prepare_config_and_inputs_for_common(self): + config, inputs_dict = self.prepare_config_and_inputs() + return config, inputs_dict + + def get_subsampled_output_lengths(self, input_lengths): + """ + Computes the output length of the convolutional layers + """ + + for i in range(self.num_conv_layers): + input_lengths = (input_lengths - 1) // 2 + 1 + + return input_lengths + + @property + def encoder_seq_length(self): + return self.get_subsampled_output_lengths(self.seq_length) + + def create_and_check_model_forward(self, config, inputs_dict, freeze_encoder=False): + model = TFWhisperForAudioClassification(config=config) + + if freeze_encoder: + model.freeze_encoder() + + input_features = inputs_dict["input_features"] + + # first forward pass + last_hidden_state = model(input_features).logits + + self.parent.assertTrue(last_hidden_state.shape, (13, 2)) + + +def prepare_whisper_encoder_inputs_dict(config, input_features, head_mask=None): + if head_mask is None: + head_mask = tf.ones([config.encoder_layers, config.encoder_attention_heads]) + return {"input_features": input_features, "head_mask": head_mask} + +@require_tf +class TFWhisperEncoderModelTest(TFModelTesterMixin, TFGenerationTesterMixin, unittest.TestCase): + all_model_classes = (TFWhisperForAudioClassification,) if is_tf_available() else () + is_encoder_decoder = False + fx_compatible = False + test_pruning = False + test_missing_keys = False + + input_name = "input_features" + + def setUp(self): + self.model_tester = TFWhisperEncoderModelTester(self) + self.config_tester = ConfigTester(self, config_class=WhisperConfig) + self.maxDiff = 3000 + + def test_config(self): + self.config_tester.run_common_tests() + + def test_forward_signature(self): + config, _ = self.model_tester.prepare_config_and_inputs_for_common() + + for model_class in self.all_model_classes: + model = model_class(config) + signature = inspect.signature(model.forward) + # signature.parameters is an OrderedDict => so arg_names order is deterministic + arg_names = [*signature.parameters.keys()] + + expected_arg_names = ["input_features", "head_mask", "encoder_outputs"] + self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names) + + # input embeds is meaningless for an encoder-only acoustic model + def test_inputs_embeds(self): + pass + + # the equivalent test is passing the encoder outputs directly to the model + def test_encoder_outputs(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + for model_class in self.all_model_classes: + model = model_class(config) + model.to(torch_device) + model.eval() + + inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) + + with tf.stop_gradient(inputs): + outputs = model(**inputs)[0] + + input_ids = inputs["input_features"] + del inputs["input_features"] + + encoder = model.encoder + + with tf.stop_gradient(inputs): + inputs["encoder_outputs"] = encoder(input_ids) + outputs_embeds = model(**inputs)[0] + + self.assertTrue((outputs_embeds == outputs).all()) + + # WhisperEncoder has no inputs_embeds and thus the `get_input_embeddings` fn is not implemented + def test_model_common_attributes(self): + pass + + # WhisperEncoder cannot resize token embeddings since it has no tokens embeddings + def test_resize_tokens_embeddings(self): + pass + @require_tf @require_tokenizers From 7fe04e667df1b625fb57b66e3639dd439e68bf9a Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 1 May 2023 12:11:24 -0400 Subject: [PATCH 05/20] modified tests --- .../models/auto/modeling_tf_auto.py | 10 ++++---- src/transformers/models/whisper/__init__.py | 2 +- .../models/whisper/modeling_tf_whisper.py | 23 +++++++++--------- src/transformers/utils/dummy_tf_objects.py | 7 +----- .../whisper/test_modeling_tf_whisper.py | 24 +++++++++---------- 5 files changed, 29 insertions(+), 37 deletions(-) diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index ee61590abaff..db65a4c5e341 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -164,11 +164,7 @@ ] ) -TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict( - [ - ("whisper", "TFWhisperForAudioClassification") - ] -) +TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict([("whisper", "TFWhisperForAudioClassification")]) TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict( @@ -434,7 +430,9 @@ ) TF_MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_MAPPING_NAMES) -TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES) +TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping( + CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES +) TF_MODEL_FOR_PRETRAINING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_PRETRAINING_MAPPING_NAMES) TF_MODEL_WITH_LM_HEAD_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_WITH_LM_HEAD_MAPPING_NAMES) diff --git a/src/transformers/models/whisper/__init__.py b/src/transformers/models/whisper/__init__.py index 34df1c8a7d32..8aa0749cdcc7 100644 --- a/src/transformers/models/whisper/__init__.py +++ b/src/transformers/models/whisper/__init__.py @@ -63,7 +63,7 @@ "TFWhisperForConditionalGeneration", "TFWhisperModel", "TFWhisperPreTrainedModel", - "TFWhisperForAudioClassification" + "TFWhisperForAudioClassification", ] try: diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index e08562059857..4e479a84f415 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -28,7 +28,7 @@ TFBaseModelOutputWithPastAndCrossAttentions, TFSeq2SeqLMOutput, TFSeq2SeqModelOutput, - TFSequenceClassifierOutput + TFSequenceClassifierOutput, ) from ...modeling_tf_utils import ( TFCausalLanguageModelingLoss, @@ -1388,18 +1388,20 @@ def prepare_inputs_for_generation( "decoder_position_ids": decoder_position_ids, } + class TFWhisperForAudioClassification(TFWhisperPreTrainedModel): def __init__(self, config): super().__init__(config) - + self.encoder = TFWhisperEncoder(config) num_layers = config.num_hidden_layers + 1 if config.use_weighted_layer_sum: self.layer_weights = tf.Variable(tf.ones(shape=(num_layers,)) / num_layers) self.projector = tf.keras.layers.Dense(units=config.classifier_proj_size, input_shape=(config.hidden_size,)) - self.classifier = tf.keras.layers.Dense(units=config.num_labels, input_shape=(config.classifier_proj_size,), - activation=None) - + self.classifier = tf.keras.layers.Dense( + units=config.num_labels, input_shape=(config.classifier_proj_size,), activation=None + ) + @unpack_inputs def call( self, @@ -1409,7 +1411,7 @@ def call( labels: Optional[tf.Tensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, - return_dict: Optional[bool] = None + return_dict: Optional[bool] = None, ): return_dict = return_dict if return_dict is not None else self.config.use_return_dict output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states @@ -1432,13 +1434,13 @@ def call( pooled_output = tf.reduce_mean(hidden_states, axis=1) logits = self.classifier(pooled_output) - + loss = None - + if labels is not None: loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) loss = loss_fn(tf.reshape(labels, [-1]), tf.reshape(logits, [-1, self.config.num_labels])) - + if not return_dict: output = (logits,) + encoder_outputs[1:] return ((loss,) + output) if loss is not None else output @@ -1449,6 +1451,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - - \ No newline at end of file diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py index 9239f742eea0..166b02854d54 100644 --- a/src/transformers/utils/dummy_tf_objects.py +++ b/src/transformers/utils/dummy_tf_objects.py @@ -2607,18 +2607,13 @@ def __init__(self, *args, **kwargs): TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST = None -class TFWhisperForAudioClassification(metaclass=DummyObject): - _backends = ["tf"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["tf"]) - class TFWhisperForConditionalGeneration(metaclass=DummyObject): _backends = ["tf"] def __init__(self, *args, **kwargs): requires_backends(self, ["tf"]) + class TFWhisperModel(metaclass=DummyObject): _backends = ["tf"] diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index 10687549158e..9bec453af741 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -39,7 +39,12 @@ if is_tf_available(): import tensorflow as tf - from transformers import TFWhisperForConditionalGeneration, TFWhisperModel, TFWhisperForAudioClassification, set_seed + from transformers import ( + TFWhisperForAudioClassification, + TFWhisperForConditionalGeneration, + TFWhisperModel, + set_seed, + ) from transformers.models.whisper.modeling_tf_whisper import TFWhisperDecoder, TFWhisperEncoder @@ -803,6 +808,7 @@ def _test_large_batched_generation(in_queue, out_queue, timeout): out_queue.put(results, timeout=timeout) out_queue.join() + @require_tf class TFWhisperEncoderModelTester: def __init__( @@ -922,8 +928,9 @@ def prepare_whisper_encoder_inputs_dict(config, input_features, head_mask=None): head_mask = tf.ones([config.encoder_layers, config.encoder_attention_heads]) return {"input_features": input_features, "head_mask": head_mask} + @require_tf -class TFWhisperEncoderModelTest(TFModelTesterMixin, TFGenerationTesterMixin, unittest.TestCase): +class TFWhisperEncoderModelTest(TFModelTesterMixin, unittest.TestCase): all_model_classes = (TFWhisperForAudioClassification,) if is_tf_available() else () is_encoder_decoder = False fx_compatible = False @@ -962,22 +969,15 @@ def test_encoder_outputs(self): for model_class in self.all_model_classes: model = model_class(config) - model.to(torch_device) - model.eval() inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) - - with tf.stop_gradient(inputs): - outputs = model(**inputs)[0] - + outputs = model(**inputs)[0] input_ids = inputs["input_features"] del inputs["input_features"] encoder = model.encoder - - with tf.stop_gradient(inputs): - inputs["encoder_outputs"] = encoder(input_ids) - outputs_embeds = model(**inputs)[0] + inputs["encoder_outputs"] = encoder(input_ids) + outputs_embeds = model(**inputs)[0] self.assertTrue((outputs_embeds == outputs).all()) From 6afd3a4e4a96bdac849a77bab7c1968a219e6883 Mon Sep 17 00:00:00 2001 From: adit299 Date: Fri, 5 May 2023 10:54:52 -0400 Subject: [PATCH 06/20] attempting to fix issues with tests --- src/transformers/__init__.py | 2 ++ src/transformers/models/auto/modeling_tf_auto.py | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 7bf322ca8e1e..464f72adfe05 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -3462,6 +3462,7 @@ _import_structure["models.whisper"].extend( [ "TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST", + "TFWhisperForAudioClassification", "TFWhisperForConditionalGeneration", "TFWhisperModel", "TFWhisperPreTrainedModel", @@ -6658,6 +6659,7 @@ ) from .models.whisper import ( TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST, + TFWhisperForAudioClassification, TFWhisperForConditionalGeneration, TFWhisperModel, TFWhisperPreTrainedModel, diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index a2078da33b86..0890c079ae00 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -164,9 +164,6 @@ ] ) -TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict([("whisper", "TFWhisperForAudioClassification")]) - - TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict( [ # Model for Causal LM mapping @@ -354,7 +351,13 @@ ("xlnet", "TFXLNetForQuestionAnsweringSimple"), ] ) -TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict([("wav2vec2", "TFWav2Vec2ForSequenceClassification")]) + +TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict( + [ + ("wav2vec2", "TFWav2Vec2ForSequenceClassification"), + ("whisper", "TFWhisperForAudioClassification"), + ] +) TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict( [ From 874af91128601611d4052f7706ad28a075238218 Mon Sep 17 00:00:00 2001 From: adit299 Date: Fri, 5 May 2023 11:28:17 -0400 Subject: [PATCH 07/20] adding dummy_tf_object for whisper model --- src/transformers/utils/dummy_tf_objects.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py index 2a043f50f350..80890cc14be5 100644 --- a/src/transformers/utils/dummy_tf_objects.py +++ b/src/transformers/utils/dummy_tf_objects.py @@ -2614,6 +2614,13 @@ def __init__(self, *args, **kwargs): TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST = None +class TFWhisperForAudioClassification(metaclass=DummyObject): + _backends = ["tf"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["tf"]) + + class TFWhisperForConditionalGeneration(metaclass=DummyObject): _backends = ["tf"] From 469e1ef4d3bb5214f3dff9a9ea3b273dae6fe568 Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 8 May 2023 10:16:20 -0400 Subject: [PATCH 08/20] attempting to fix circleci tests --- .../models/whisper/modeling_tf_whisper.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 4e479a84f415..544fda62870f 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1416,13 +1416,14 @@ def call( return_dict = return_dict if return_dict is not None else self.config.use_return_dict output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states - outputs = self.encoder( - input_features, - head_mask=head_mask, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict=return_dict, - ) + if encoder_outputs is None: + outputs = self.encoder( + input_features, + head_mask=head_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) if self.config.use_weighted_layer_sum: hidden_states = tf.stack(encoder_outputs, axis=1) norm_weights = tf.nn.softmax(self.layer_weights, axis=-1) From 9e7415f803f7cc967133fd926a1487adde4903b1 Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 8 May 2023 10:30:36 -0400 Subject: [PATCH 09/20] correcting mistake in previous commit --- src/transformers/models/whisper/modeling_tf_whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 544fda62870f..5553e816e89f 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1417,7 +1417,7 @@ def call( output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states if encoder_outputs is None: - outputs = self.encoder( + encoer_outputs = self.encoder( input_features, head_mask=head_mask, output_attentions=output_attentions, From 67eaf018d2c06b0470191af088147ebfb1832ad9 Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 8 May 2023 10:57:35 -0400 Subject: [PATCH 10/20] correcting mistakes --- src/transformers/models/whisper/modeling_tf_whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 5553e816e89f..2f713bcc69a1 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1417,7 +1417,7 @@ def call( output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states if encoder_outputs is None: - encoer_outputs = self.encoder( + encoder_outputs = self.encoder( input_features, head_mask=head_mask, output_attentions=output_attentions, From d545856d09c3d2e756bcb63adf6de02535913c06 Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 8 May 2023 11:11:57 -0400 Subject: [PATCH 11/20] correcting more mistakes --- src/transformers/models/whisper/modeling_tf_whisper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 2f713bcc69a1..24019c158967 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1449,6 +1449,6 @@ def call( return TFSequenceClassifierOutput( loss=loss, logits=logits, - hidden_states=outputs.hidden_states, - attentions=outputs.attentions, + hidden_states=encoder_outputs.hidden_states, + attentions=encoder_outputs.attentions, ) From 4907a9a76c4867397aeb10d84407180ad651b467 Mon Sep 17 00:00:00 2001 From: adit299 Date: Thu, 11 May 2023 18:07:23 -0400 Subject: [PATCH 12/20] renamed call function to forward to resolve test error --- src/transformers/models/whisper/modeling_tf_whisper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 78a197608068..e682e86b2e00 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -129,7 +129,7 @@ def build(self, input_shape): def call(self, input_ids, past_key_values_length=0): past_key_values_length = tf.cast(past_key_values_length, tf.int32) - gather_indices = tf.range(tf.shape(input_ids)[1], delta=1) + past_key_values_length + gather_indices = tf.range(tf.shape(input_ids)[-1], delta=1) + past_key_values_length return tf.gather(self.weight, gather_indices) @@ -1403,7 +1403,7 @@ def __init__(self, config): ) @unpack_inputs - def call( + def forward( self, input_features: Optional[tf.Tensor] = None, head_mask: Optional[tf.Tensor] = None, @@ -1452,3 +1452,4 @@ def call( hidden_states=encoder_outputs.hidden_states, attentions=encoder_outputs.attentions, ) + From 94cdcd5f8db0a1f82aae973e8ea700856db0c4a3 Mon Sep 17 00:00:00 2001 From: adit299 Date: Thu, 11 May 2023 18:52:12 -0400 Subject: [PATCH 13/20] attempting to resolve more test errors --- .../models/whisper/modeling_tf_whisper.py | 2 +- tests/models/whisper/test_modeling_tf_whisper.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index e682e86b2e00..4c5d9614d3ae 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1403,7 +1403,7 @@ def __init__(self, config): ) @unpack_inputs - def forward( + def call( self, input_features: Optional[tf.Tensor] = None, head_mask: Optional[tf.Tensor] = None, diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index e01d2a981ed4..7fea670fb9ee 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -952,7 +952,7 @@ def test_forward_signature(self): for model_class in self.all_model_classes: model = model_class(config) - signature = inspect.signature(model.forward) + signature = inspect.signature(model.call) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] @@ -971,13 +971,18 @@ def test_encoder_outputs(self): model = model_class(config) inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) - outputs = model(**inputs)[0] + + with tf.stop_gradient: + outputs = model(**inputs)[0] + input_ids = inputs["input_features"] del inputs["input_features"] encoder = model.encoder - inputs["encoder_outputs"] = encoder(input_ids) - outputs_embeds = model(**inputs)[0] + + with tf.stop_gradient: + inputs["encoder_outputs"] = encoder(input_ids) + outputs_embeds = model(**inputs)[0] self.assertTrue((outputs_embeds == outputs).all()) From 24a42fe00f861f8468c72cdfe09c9b0d59d8fb95 Mon Sep 17 00:00:00 2001 From: adit299 Date: Tue, 16 May 2023 13:02:05 -0400 Subject: [PATCH 14/20] addressing review comments --- tests/models/whisper/test_modeling_tf_whisper.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index 7fea670fb9ee..1bf8d4ecc982 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -972,17 +972,15 @@ def test_encoder_outputs(self): inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) - with tf.stop_gradient: - outputs = model(**inputs)[0] + outputs = model(**inputs)[0] input_ids = inputs["input_features"] del inputs["input_features"] encoder = model.encoder - with tf.stop_gradient: - inputs["encoder_outputs"] = encoder(input_ids) - outputs_embeds = model(**inputs)[0] + inputs["encoder_outputs"] = encoder(input_ids) + outputs_embeds = model(**inputs)[0] self.assertTrue((outputs_embeds == outputs).all()) From 6f4101d8a0000e8aeb956151364c082d8e7ba51b Mon Sep 17 00:00:00 2001 From: adit299 Date: Wed, 17 May 2023 11:41:11 -0400 Subject: [PATCH 15/20] Addressing review comments and fixing code quality --- src/transformers/models/whisper/modeling_tf_whisper.py | 1 - tests/models/whisper/test_modeling_tf_whisper.py | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 4c5d9614d3ae..24019c158967 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1452,4 +1452,3 @@ def call( hidden_states=encoder_outputs.hidden_states, attentions=encoder_outputs.attentions, ) - diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index 68b53fdce3c0..9765bbd54936 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -975,18 +975,17 @@ def test_encoder_outputs(self): model = model_class(config) inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) - + outputs = model(**inputs)[0] - + input_ids = inputs["input_features"] - del inputs["input_features"] encoder = model.encoder - + inputs["encoder_outputs"] = encoder(input_ids) outputs_embeds = model(**inputs)[0] - self.assertTrue((outputs_embeds == outputs).all()) + self.assertTrue(tf.experimental.numpy.all(outputs_embeds == outputs)) # WhisperEncoder has no inputs_embeds and thus the `get_input_embeddings` fn is not implemented def test_model_common_attributes(self): From 54b6eddf68c84fbf7ccd2c0fedeb1fe6d9656384 Mon Sep 17 00:00:00 2001 From: adit299 Date: Sun, 18 Jun 2023 16:20:47 -0400 Subject: [PATCH 16/20] solving onnx test error --- tests/models/whisper/test_modeling_tf_whisper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index 9765bbd54936..7d099dc26ba2 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -940,6 +940,7 @@ class TFWhisperEncoderModelTest(TFModelTesterMixin, unittest.TestCase): fx_compatible = False test_pruning = False test_missing_keys = False + test_onnx = False input_name = "input_features" From d0f7e22326a41a3455e8ef890959f517d1bd718a Mon Sep 17 00:00:00 2001 From: adit299 Date: Sun, 18 Jun 2023 19:38:28 -0400 Subject: [PATCH 17/20] attempting to fix failing quality tests --- tests/models/whisper/test_modeling_tf_whisper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index c88dc0d074b5..f60cb8a3ee97 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Testing suite for the TensorFlow Whisper model. """ +from __future__ import annotations + import copy import inspect import tempfile @@ -21,7 +23,6 @@ import numpy as np -from __future__ import annotations from transformers import WhisperConfig, WhisperFeatureExtractor, WhisperProcessor from transformers.testing_utils import is_tf_available, require_tf, require_tokenizers, run_test_in_subprocess, slow from transformers.utils import cached_property From 4f02f9e8b2dbeecaf98ac1fbeaadca146f0611a8 Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 28 Aug 2023 13:08:06 -0400 Subject: [PATCH 18/20] resolving test_resize_token_embeddings --- .../models/whisper/modeling_tf_whisper.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index f9168eebdd16..2e450c16d190 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -487,14 +487,14 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: self.main_input_name: tf.random.uniform( [1, self.config.num_mel_bins, self.config.max_source_positions * 2 - 1], dtype=tf.float32 ), - "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32), + # "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32), } @property def input_signature(self): return { "input_features": tf.TensorSpec((None, self.config.num_mel_bins, None), tf.float32, name="input_features"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + # "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), } @@ -628,6 +628,12 @@ def __init__(self, config: WhisperConfig, **kwargs): self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layer_norm") self.dropout = tf.keras.layers.Dropout(config.dropout) + + def get_input_embeddings(self): + return self.conv1 + + def set_input_embeddings(self, value): + self.conv1 = value @unpack_inputs def call( @@ -1614,6 +1620,13 @@ def __init__(self, config): self.classifier = tf.keras.layers.Dense( units=config.num_labels, input_shape=(config.classifier_proj_size,), activation=None ) + + def get_input_embeddings(self): + return self.encoder.get_input_embeddings() + + def set_input_embeddings(self, value): + self.encoder.set_input_embeddings(value) + @unpack_inputs def call( From 468a9bf63519c4e91c63472935f7a1e46cc13b97 Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 4 Sep 2023 11:50:25 -0400 Subject: [PATCH 19/20] override dummy_inputs and input_signature methods --- myoutput.txt | 628 ++++++++++++++++++ .../models/auto/modeling_tf_auto.py | 2 +- .../models/whisper/modeling_tf_whisper.py | 23 +- 3 files changed, 650 insertions(+), 3 deletions(-) create mode 100644 myoutput.txt diff --git a/myoutput.txt b/myoutput.txt new file mode 100644 index 000000000000..81bc8eabe5c7 --- /dev/null +++ b/myoutput.txt @@ -0,0 +1,628 @@ +============================= test session starts ============================== +platform linux -- Python 3.8.10, pytest-7.2.1, pluggy-1.0.0 +rootdir: /home/adit299/transformers, configfile: setup.cfg +plugins: hypothesis-6.64.0, timeout-2.1.0, xdist-3.1.0 +collected 40 items + +tests/models/whisper/test_modeling_tf_whisper.py .F...sFF..F............ [ 57%] +....FsFFF.Fs.ss.s [100%] + +=================================== FAILURES =================================== +_______________ TFWhisperEncoderModelTest.test_compile_tf_model ________________ + +self = + + def test_compile_tf_model(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + max_input = getattr(self.model_tester, "max_position_embeddings", 512) + optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0) + loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy") + + for model_class in self.all_model_classes: + if model_class.__name__ in ["TFSpeech2TextModel", "TFSpeech2TextForConditionalGeneration"]: + inputs = { + "decoder_input_ids": tf.keras.Input( + batch_shape=(2, max_input), + name="decoder_input_ids", + dtype="int32", + ), + "input_features": tf.keras.Input( + batch_shape=( + 2, + max_input, + self.model_tester.input_feat_per_channel * self.model_tester.input_channels, + ), + name="input_features", + dtype="float32", + ), + } + elif model_class.__name__ in ["TFWhisperModel", "TFWhisperForConditionalGeneration"]: + inputs = { + "decoder_input_ids": tf.keras.Input( + batch_shape=(2, max_input), + name="decoder_input_ids", + dtype="int32", + ), + "input_features": tf.keras.Input( + batch_shape=( + 2, + self.model_tester.num_mel_bins, + self.model_tester.seq_length, + ), + name="input_features", + dtype="float32", + ), + } + elif self.is_encoder_decoder: + inputs = { + "decoder_input_ids": tf.keras.Input( + batch_shape=(2, max_input), + name="decoder_input_ids", + dtype="int32", + ), + "input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"), + } + # `pixel_values` implies that the input is an image + elif model_class.main_input_name == "pixel_values": + inputs = tf.keras.Input( + batch_shape=( + 3, + self.model_tester.num_channels, + self.model_tester.image_size, + self.model_tester.image_size, + ), + name="pixel_values", + dtype="float32", + ) + elif model_class.__name__ in ["TFCLIPModel", "TFGroupViTModel", "TFBlipModel"]: + inputs = { + "input_ids": tf.keras.Input(batch_shape=(3, max_input), name="input_ids", dtype="int32"), + "pixel_values": tf.keras.Input( + batch_shape=( + 3, + self.model_tester.vision_model_tester.num_channels, + self.model_tester.vision_model_tester.image_size, + self.model_tester.vision_model_tester.image_size, + ), + name="pixel_values", + dtype="float32", + ), + } + elif model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): + inputs = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32") + else: + inputs = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32") + + # Prepare our model + model = model_class(config) + model(self._prepare_for_class(inputs_dict, model_class)) # Model must be called before saving. + # Let's load it from the disk to be sure we can use pretrained weights + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname, saved_model=False) +> model = model_class.from_pretrained(tmpdirname) + +tests/test_modeling_tf_common.py:827: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +src/transformers/modeling_tf_utils.py:2810: in from_pretrained + model(model.dummy_inputs) # build the network with dummy inputs +.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler + raise e.with_traceback(filtered_tb) from None +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ({'decoder_input_ids': , 'input_features': <...], + [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337, + 0.2877066 , 0.8109739 ]]], dtype=float32)>},) +kwargs = {} +fn_args_and_kwargs = {'input_features': {'decoder_input_ids': , '...2292, 0.9640751 , 0.32960486, ..., 0.53566337, + 0.2877066 , 0.8109739 ]]], dtype=float32)>}, 'kwargs_call': {}} +config = WhisperConfig { + "_name_or_path": "/tmp/tmpa3qgulea", + "activation_dropout": 0.0, + "activation_function": "gelu", + ..."transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} + +unpacked_inputs = {'decoder_input_ids': , 'encoder_outputs': N... [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337, + 0.2877066 , 0.8109739 ]]], dtype=float32)>, ...} + + @functools.wraps(func) + def run_call_with_unpacked_inputs(self, *args, **kwargs): + # isolates the actual `**kwargs` for the decorated function + kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)} + fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call} + fn_args_and_kwargs.update({"kwargs_call": kwargs_call}) + + # move any arg into kwargs, if they exist + fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args))) + + # Encoder Decoder models delegate the application of the configuration options to their inner models. + if "EncoderDecoder" in self.__class__.__name__: + config = None + else: + config = self.config + + unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs) +> return func(self, **unpacked_inputs) +E TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification). +E +E call() got an unexpected keyword argument 'decoder_input_ids' +E +E Call arguments received by layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification): +E • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'} +E • head_mask=None +E • encoder_outputs=None +E • labels=None +E • output_attentions=None +E • output_hidden_states=None +E • return_dict=None + +src/transformers/modeling_tf_utils.py:434: TypeError +________________ TFWhisperEncoderModelTest.test_encoder_outputs ________________ + +self = + + def test_encoder_outputs(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + for model_class in self.all_model_classes: + model = model_class(config) + + inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) + outputs = model(**inputs)[0] + input_ids = inputs["input_features"] + del inputs["input_features"] + + encoder = model.encoder + inputs["encoder_outputs"] = encoder(input_ids) +> outputs_embeds = model(**inputs)[0] + +tests/models/whisper/test_modeling_tf_whisper.py:980: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler + raise e.with_traceback(filtered_tb) from None +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = () +kwargs = {'encoder_outputs': TFBaseModelOutput(last_hidden_state=} + + def split_out_first_arg(self, args, kwargs): + """Splits (args, kwargs) into (inputs, args, kwargs).""" + # Grab the argument corresponding to the first argument in the + # layer's `call` method spec. This will either be the first positional + # argument, or it will be provided as a keyword argument. + if args: + inputs = args[0] + args = args[1:] + elif self._arg_names[0] in kwargs: + kwargs = copy.copy(kwargs) + inputs = kwargs.pop(self._arg_names[0]) + else: +> raise ValueError( + "The first argument to `Layer.call` must always be passed." + ) +E ValueError: The first argument to `Layer.call` must always be passed. + +.env/lib/python3.8/site-packages/keras/utils/layer_utils.py:809: ValueError +_______________ TFWhisperEncoderModelTest.test_forward_signature _______________ + +self = + + def test_forward_signature(self): + config, _ = self.model_tester.prepare_config_and_inputs_for_common() + + for model_class in self.all_model_classes: + model = model_class(config) +> signature = inspect.signature(model.forward) +E AttributeError: 'TFWhisperForAudioClassification' object has no attribute 'forward' + +tests/models/whisper/test_modeling_tf_whisper.py:955: AttributeError +_____________ TFWhisperEncoderModelTest.test_hidden_states_output ______________ + +self = + + def test_hidden_states_output(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + def check_hidden_states_output(config, inputs_dict, model_class): + model = model_class(config) + outputs = model(self._prepare_for_class(inputs_dict, model_class)) + expected_num_layers = getattr( + self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 + ) + + if model.config.is_encoder_decoder: + encoder_hidden_states = outputs.encoder_hidden_states + decoder_hidden_states = outputs.decoder_hidden_states + + self.assertEqual(config.output_attentions, False) + self.assertEqual(len(encoder_hidden_states), expected_num_layers) + self.assertListEqual( + list(encoder_hidden_states[0].shape[-2:]), + [self.model_tester.seq_length, self.model_tester.hidden_size], + ) + self.assertEqual(len(decoder_hidden_states), expected_num_layers) + self.assertListEqual( + list(decoder_hidden_states[0].shape[-2:]), + [self.model_tester.seq_length, self.model_tester.hidden_size], + ) + else: + hidden_states = outputs.hidden_states + self.assertEqual(config.output_attentions, False) + self.assertEqual(len(hidden_states), expected_num_layers) + self.assertListEqual( + list(hidden_states[0].shape[-2:]), + [self.model_tester.seq_length, self.model_tester.hidden_size], + ) + + for model_class in self.all_model_classes: + inputs_dict["output_hidden_states"] = True +> check_hidden_states_output(config, inputs_dict, model_class) + +tests/test_modeling_tf_common.py:1028: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +tests/test_modeling_tf_common.py:1021: in check_hidden_states_output + self.assertListEqual( +E AssertionError: Lists differ: [30, 16] != [60, 16] +E +E First differing element 0: +E 30 +E 60 +E +E - [30, 16] +E ? ^ +E +E + [60, 16] +E ? ^ +________________ TFWhisperEncoderModelTest.test_onnx_compliancy ________________ + +self = + + def test_onnx_compliancy(self): +> if not self.test_onnx: +E AttributeError: 'TFWhisperEncoderModelTest' object has no attribute 'test_onnx' + +tests/test_modeling_tf_common.py:343: AttributeError +____________ TFWhisperEncoderModelTest.test_prepare_serving_output _____________ + +self = + + def test_prepare_serving_output(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.output_hidden_states = True + config.output_attentions = self.has_attentions + + for model_class in self.all_model_classes: + model = model_class(config) + inputs = self._prepare_for_class(inputs_dict, model_class) + outputs = model(inputs) +> serving_outputs = model.serving_output(outputs) + +tests/test_modeling_tf_common.py:300: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +output = TFSequenceClassifierOutput(loss=None, logits=)) + + def serving_output(self, output): + """ + Prepare the output of the saved model. Each model must implement this function. + + Args: + output ([`TFBaseModelOutput`]): + The output returned by the model. + """ +> raise NotImplementedError +E NotImplementedError + +src/transformers/modeling_tf_utils.py:1236: NotImplementedError +____________ TFWhisperEncoderModelTest.test_pt_tf_model_equivalence ____________ + +self = +allow_missing_keys = False + + @is_pt_tf_cross_test + def test_pt_tf_model_equivalence(self, allow_missing_keys=False): + import transformers + + for model_class in self.all_model_classes: + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + # Output all for aggressive testing + config.output_hidden_states = True + config.output_attentions = self.has_attentions + + # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency + # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`. + # TODO: Use a uniform value for all models, make sure all tests pass without this processing, and remove it. + self._make_attention_mask_non_null(inputs_dict) + + pt_model_class_name = model_class.__name__[2:] # Skip the "TF" at the beginning + pt_model_class = getattr(transformers, pt_model_class_name) + + tf_model = model_class(config) + pt_model = pt_model_class(config) + + tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + tf_inputs_dict_with_labels = self._prepare_for_class( + inputs_dict, + model_class, + # Not all models accept "labels" in the forward pass (yet :) ) + return_labels=True if "labels" in inspect.signature(model_class.call).parameters.keys() else False, + ) + + # For some models (e.g. base models), there is no label returned. + # Set the input dict to `None` to avoid check outputs twice for the same input dicts. + if not set(tf_inputs_dict_with_labels.keys()).symmetric_difference(tf_inputs_dict.keys()): + tf_inputs_dict_with_labels = None + + # Check we can load pt model in tf and vice-versa with model => model functions +> tf_model = transformers.load_pytorch_model_in_tf2_model( + tf_model, pt_model, tf_inputs=tf_inputs_dict, allow_missing_keys=allow_missing_keys + ) + +tests/test_modeling_tf_common.py:706: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +src/transformers/modeling_tf_pytorch_utils.py:204: in load_pytorch_model_in_tf2_model + return load_pytorch_weights_in_tf2_model( +src/transformers/modeling_tf_pytorch_utils.py:230: in load_pytorch_weights_in_tf2_model + return load_pytorch_state_dict_in_tf2_model( +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +tf_model = +pt_state_dict = {'classifier.bias': array([0., 0.], dtype=float32), 'classifier.weight': array([[-0.00916307, 0.00247839, 0.01647531... [ 0.04591416, -0.00516921, 0.03591024], + [-0.01335885, -0.01553431, -0.0269356 ]]], dtype=float32), ...} +tf_inputs = {'head_mask': } +allow_missing_keys = False, output_loading_info = False, _prefix = '' +tf_to_pt_weight_rename = None, ignore_mismatched_sizes = False + + def load_pytorch_state_dict_in_tf2_model( + tf_model, + pt_state_dict, + tf_inputs=None, + allow_missing_keys=False, + output_loading_info=False, + _prefix=None, + tf_to_pt_weight_rename=None, + ignore_mismatched_sizes=False, + ): + """Load a pytorch state_dict in a TF 2.0 model.""" + import tensorflow as tf + from packaging.version import parse + + if parse(tf.__version__) >= parse("2.11.0"): + from keras import backend as K + else: + from tensorflow.python.keras import backend as K + + if tf_inputs is None: + tf_inputs = tf_model.dummy_inputs + + if _prefix is None: + _prefix = "" + if tf_inputs is not None: + with tf.name_scope(_prefix): + tf_model(tf_inputs, training=False) # Make sure model is built + # Adapt state dict - TODO remove this and update the AWS weights files instead + # Convert old format to new format if needed from a PyTorch state_dict + old_keys = [] + new_keys = [] + for key in pt_state_dict.keys(): + new_key = None + if "gamma" in key: + new_key = key.replace("gamma", "weight") + if "beta" in key: + new_key = key.replace("beta", "bias") + if "running_var" in key: + new_key = key.replace("running_var", "moving_variance") + if "running_mean" in key: + new_key = key.replace("running_mean", "moving_mean") + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + pt_state_dict[new_key] = pt_state_dict.pop(old_key) + + # Matt: All TF models store the actual model stem in a MainLayer class, including the base model. + # In PT, the derived models (with heads) use the base model class as the stem instead, and the base model + # just contains the stem itself, and there is no MainLayer class. This means that TF base classes have one + # extra layer in their weight names, corresponding to the MainLayer class. This code block compensates for that. + start_prefix_to_remove = "" + if not any(s.startswith(tf_model.base_model_prefix) for s in pt_state_dict.keys()): + start_prefix_to_remove = tf_model.base_model_prefix + "." + + symbolic_weights = tf_model.trainable_weights + tf_model.non_trainable_weights + tf_loaded_numel = 0 + weight_value_tuples = [] + all_pytorch_weights = set(pt_state_dict.keys()) + missing_keys = [] + mismatched_keys = [] + for symbolic_weight in symbolic_weights: + sw_name = symbolic_weight.name + name, transpose = convert_tf_weight_name_to_pt_weight_name( + sw_name, + start_prefix_to_remove=start_prefix_to_remove, + tf_weight_shape=symbolic_weight.shape, + name_scope=_prefix, + ) + if tf_to_pt_weight_rename is not None: + name = tf_to_pt_weight_rename(name) + + # Find associated numpy array in pytorch model state dict + if name not in pt_state_dict: + if allow_missing_keys: + missing_keys.append(name) + continue + elif tf_model._keys_to_ignore_on_load_missing is not None: + # authorized missing keys don't have to be loaded + if any(re.search(pat, name) is not None for pat in tf_model._keys_to_ignore_on_load_missing): + continue +> raise AttributeError(f"{name} not found in PyTorch model") +E AttributeError: tf_whisper_encoder_17.conv1.weight not found in PyTorch model + +src/transformers/modeling_tf_pytorch_utils.py:322: AttributeError +____________ TFWhisperEncoderModelTest.test_resize_token_embeddings ____________ + +self = + + def test_resize_token_embeddings(self): + # TODO (joao): after the embeddings refactor is complete, rework this test so as to rely exclusively on + # tf.keras.layers.Embedding + + if not self.test_resize_embeddings: + return + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + def _get_word_embedding_weight(model, embedding_layer): + if isinstance(embedding_layer, tf.keras.layers.Embedding): + # builds the embeddings layer + model(model.dummy_inputs) + return embedding_layer.embeddings + else: + return model._get_word_embedding_weight(embedding_layer) + + for model_class in self.all_model_classes: + for size in [config.vocab_size - 10, config.vocab_size + 10, None]: + # build the embeddings + model = model_class(config=copy.deepcopy(config)) # `resize_token_embeddings` mutates `config` +> old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings()) + +tests/test_modeling_tf_common.py:1211: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = + + def get_input_embeddings(self) -> tf.keras.layers.Layer: + """ + Returns the model's input embeddings layer. + + Returns: + `tf.Variable`: The embeddings layer mapping vocabulary to hidden states. + """ + main_layer = getattr(self, self.base_model_prefix, self) + + if main_layer is not self: + return main_layer.get_input_embeddings() + else: +> raise NotImplementedError +E NotImplementedError + +src/transformers/modeling_tf_utils.py:1262: NotImplementedError +___________________ TFWhisperEncoderModelTest.test_save_load ___________________ + +self = + + def test_save_load(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + for model_class in self.all_model_classes: + model = model_class(config) + outputs = model(self._prepare_for_class(inputs_dict, model_class)) + + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname, saved_model=False) + + # the config file (and the generation config file, if it can generate) should be saved + self.assertTrue(os.path.exists(os.path.join(tmpdirname, CONFIG_NAME))) + self.assertEqual( + model.can_generate(), os.path.exists(os.path.join(tmpdirname, GENERATION_CONFIG_NAME)) + ) + +> model = model_class.from_pretrained(tmpdirname) + +tests/test_modeling_tf_common.py:247: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +src/transformers/modeling_tf_utils.py:2810: in from_pretrained + model(model.dummy_inputs) # build the network with dummy inputs +.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler + raise e.with_traceback(filtered_tb) from None +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +args = ({'decoder_input_ids': , 'input_features': <...], + [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773, + 0.8213068 , 0.2709607 ]]], dtype=float32)>},) +kwargs = {} +fn_args_and_kwargs = {'input_features': {'decoder_input_ids': , '...9569, 0.1321398 , 0.4477892 , ..., 0.09110773, + 0.8213068 , 0.2709607 ]]], dtype=float32)>}, 'kwargs_call': {}} +config = WhisperConfig { + "_name_or_path": "/tmp/tmpozgmlb9x", + "activation_dropout": 0.0, + "activation_function": "gelu", + ..."transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} + +unpacked_inputs = {'decoder_input_ids': , 'encoder_outputs': N... [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773, + 0.8213068 , 0.2709607 ]]], dtype=float32)>, ...} + + @functools.wraps(func) + def run_call_with_unpacked_inputs(self, *args, **kwargs): + # isolates the actual `**kwargs` for the decorated function + kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)} + fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call} + fn_args_and_kwargs.update({"kwargs_call": kwargs_call}) + + # move any arg into kwargs, if they exist + fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args))) + + # Encoder Decoder models delegate the application of the configuration options to their inner models. + if "EncoderDecoder" in self.__class__.__name__: + config = None + else: + config = self.config + + unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs) +> return func(self, **unpacked_inputs) +E TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification). +E +E call() got an unexpected keyword argument 'decoder_input_ids' +E +E Call arguments received by layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification): +E • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'} +E • head_mask=None +E • encoder_outputs=None +E • labels=None +E • output_attentions=None +E • output_hidden_states=None +E • return_dict=None + +src/transformers/modeling_tf_utils.py:434: TypeError +=============================== warnings summary =============================== +.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246 + /home/adit299/transformers/.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24) + np.bool8: (False, True), + +src/transformers/models/open_llama/modeling_open_llama.py:42 + /home/adit299/transformers/src/transformers/models/open_llama/modeling_open_llama.py:42: DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead + logger.warn( + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +=========================== short test summary info ============================ +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_compile_tf_model +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_encoder_outputs +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_forward_signature +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_hidden_states_output +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_onnx_compliancy +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_prepare_serving_output +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_pt_tf_model_equivalence +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_resize_token_embeddings +FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_save_load +============= 9 failed, 25 passed, 6 skipped, 2 warnings in 39.74s ============= diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index 0118db319b87..72156dd6e468 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -467,7 +467,7 @@ ) TF_MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_MAPPING_NAMES) -TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping( +TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = _LazyAutoMapping( CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES ) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 2e450c16d190..2f4e47d15502 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -487,14 +487,14 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: self.main_input_name: tf.random.uniform( [1, self.config.num_mel_bins, self.config.max_source_positions * 2 - 1], dtype=tf.float32 ), - # "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32), + "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32), } @property def input_signature(self): return { "input_features": tf.TensorSpec((None, self.config.num_mel_bins, None), tf.float32, name="input_features"), - # "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), } @@ -1626,7 +1626,26 @@ def get_input_embeddings(self): def set_input_embeddings(self, value): self.encoder.set_input_embeddings(value) + + @property + def dummy_inputs(self) -> Dict[str, tf.Tensor]: + """ + Dummy inputs to build the network. + Returns: + `Dict[str, tf.Tensor]`: The dummy inputs. + """ + return { + self.main_input_name: tf.random.uniform( + [1, self.config.num_mel_bins, self.config.max_source_positions * 2 - 1], dtype=tf.float32 + ), + } + + @property + def input_signature(self): + return { + "input_features": tf.TensorSpec((None, self.config.num_mel_bins, None), tf.float32, name="input_features"), + } @unpack_inputs def call( From 66fb2b5805d6b4412e94b542485aa98968dfcb6a Mon Sep 17 00:00:00 2001 From: adit299 Date: Mon, 4 Sep 2023 12:03:42 -0400 Subject: [PATCH 20/20] removing uneeded file --- myoutput.txt | 628 --------------------------------------------------- 1 file changed, 628 deletions(-) delete mode 100644 myoutput.txt diff --git a/myoutput.txt b/myoutput.txt deleted file mode 100644 index 81bc8eabe5c7..000000000000 --- a/myoutput.txt +++ /dev/null @@ -1,628 +0,0 @@ -============================= test session starts ============================== -platform linux -- Python 3.8.10, pytest-7.2.1, pluggy-1.0.0 -rootdir: /home/adit299/transformers, configfile: setup.cfg -plugins: hypothesis-6.64.0, timeout-2.1.0, xdist-3.1.0 -collected 40 items - -tests/models/whisper/test_modeling_tf_whisper.py .F...sFF..F............ [ 57%] -....FsFFF.Fs.ss.s [100%] - -=================================== FAILURES =================================== -_______________ TFWhisperEncoderModelTest.test_compile_tf_model ________________ - -self = - - def test_compile_tf_model(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - max_input = getattr(self.model_tester, "max_position_embeddings", 512) - optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0) - loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) - metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy") - - for model_class in self.all_model_classes: - if model_class.__name__ in ["TFSpeech2TextModel", "TFSpeech2TextForConditionalGeneration"]: - inputs = { - "decoder_input_ids": tf.keras.Input( - batch_shape=(2, max_input), - name="decoder_input_ids", - dtype="int32", - ), - "input_features": tf.keras.Input( - batch_shape=( - 2, - max_input, - self.model_tester.input_feat_per_channel * self.model_tester.input_channels, - ), - name="input_features", - dtype="float32", - ), - } - elif model_class.__name__ in ["TFWhisperModel", "TFWhisperForConditionalGeneration"]: - inputs = { - "decoder_input_ids": tf.keras.Input( - batch_shape=(2, max_input), - name="decoder_input_ids", - dtype="int32", - ), - "input_features": tf.keras.Input( - batch_shape=( - 2, - self.model_tester.num_mel_bins, - self.model_tester.seq_length, - ), - name="input_features", - dtype="float32", - ), - } - elif self.is_encoder_decoder: - inputs = { - "decoder_input_ids": tf.keras.Input( - batch_shape=(2, max_input), - name="decoder_input_ids", - dtype="int32", - ), - "input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"), - } - # `pixel_values` implies that the input is an image - elif model_class.main_input_name == "pixel_values": - inputs = tf.keras.Input( - batch_shape=( - 3, - self.model_tester.num_channels, - self.model_tester.image_size, - self.model_tester.image_size, - ), - name="pixel_values", - dtype="float32", - ) - elif model_class.__name__ in ["TFCLIPModel", "TFGroupViTModel", "TFBlipModel"]: - inputs = { - "input_ids": tf.keras.Input(batch_shape=(3, max_input), name="input_ids", dtype="int32"), - "pixel_values": tf.keras.Input( - batch_shape=( - 3, - self.model_tester.vision_model_tester.num_channels, - self.model_tester.vision_model_tester.image_size, - self.model_tester.vision_model_tester.image_size, - ), - name="pixel_values", - dtype="float32", - ), - } - elif model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): - inputs = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32") - else: - inputs = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32") - - # Prepare our model - model = model_class(config) - model(self._prepare_for_class(inputs_dict, model_class)) # Model must be called before saving. - # Let's load it from the disk to be sure we can use pretrained weights - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=False) -> model = model_class.from_pretrained(tmpdirname) - -tests/test_modeling_tf_common.py:827: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -src/transformers/modeling_tf_utils.py:2810: in from_pretrained - model(model.dummy_inputs) # build the network with dummy inputs -.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler - raise e.with_traceback(filtered_tb) from None -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -self = -args = ({'decoder_input_ids': , 'input_features': <...], - [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337, - 0.2877066 , 0.8109739 ]]], dtype=float32)>},) -kwargs = {} -fn_args_and_kwargs = {'input_features': {'decoder_input_ids': , '...2292, 0.9640751 , 0.32960486, ..., 0.53566337, - 0.2877066 , 0.8109739 ]]], dtype=float32)>}, 'kwargs_call': {}} -config = WhisperConfig { - "_name_or_path": "/tmp/tmpa3qgulea", - "activation_dropout": 0.0, - "activation_function": "gelu", - ..."transformers_version": "4.30.0.dev0", - "use_cache": true, - "use_weighted_layer_sum": false, - "vocab_size": 51865 -} - -unpacked_inputs = {'decoder_input_ids': , 'encoder_outputs': N... [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337, - 0.2877066 , 0.8109739 ]]], dtype=float32)>, ...} - - @functools.wraps(func) - def run_call_with_unpacked_inputs(self, *args, **kwargs): - # isolates the actual `**kwargs` for the decorated function - kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)} - fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call} - fn_args_and_kwargs.update({"kwargs_call": kwargs_call}) - - # move any arg into kwargs, if they exist - fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args))) - - # Encoder Decoder models delegate the application of the configuration options to their inner models. - if "EncoderDecoder" in self.__class__.__name__: - config = None - else: - config = self.config - - unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs) -> return func(self, **unpacked_inputs) -E TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification). -E -E call() got an unexpected keyword argument 'decoder_input_ids' -E -E Call arguments received by layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification): -E • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'} -E • head_mask=None -E • encoder_outputs=None -E • labels=None -E • output_attentions=None -E • output_hidden_states=None -E • return_dict=None - -src/transformers/modeling_tf_utils.py:434: TypeError -________________ TFWhisperEncoderModelTest.test_encoder_outputs ________________ - -self = - - def test_encoder_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - - inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class)) - outputs = model(**inputs)[0] - input_ids = inputs["input_features"] - del inputs["input_features"] - - encoder = model.encoder - inputs["encoder_outputs"] = encoder(input_ids) -> outputs_embeds = model(**inputs)[0] - -tests/models/whisper/test_modeling_tf_whisper.py:980: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler - raise e.with_traceback(filtered_tb) from None -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -self = -args = () -kwargs = {'encoder_outputs': TFBaseModelOutput(last_hidden_state=} - - def split_out_first_arg(self, args, kwargs): - """Splits (args, kwargs) into (inputs, args, kwargs).""" - # Grab the argument corresponding to the first argument in the - # layer's `call` method spec. This will either be the first positional - # argument, or it will be provided as a keyword argument. - if args: - inputs = args[0] - args = args[1:] - elif self._arg_names[0] in kwargs: - kwargs = copy.copy(kwargs) - inputs = kwargs.pop(self._arg_names[0]) - else: -> raise ValueError( - "The first argument to `Layer.call` must always be passed." - ) -E ValueError: The first argument to `Layer.call` must always be passed. - -.env/lib/python3.8/site-packages/keras/utils/layer_utils.py:809: ValueError -_______________ TFWhisperEncoderModelTest.test_forward_signature _______________ - -self = - - def test_forward_signature(self): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) -> signature = inspect.signature(model.forward) -E AttributeError: 'TFWhisperForAudioClassification' object has no attribute 'forward' - -tests/models/whisper/test_modeling_tf_whisper.py:955: AttributeError -_____________ TFWhisperEncoderModelTest.test_hidden_states_output ______________ - -self = - - def test_hidden_states_output(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - def check_hidden_states_output(config, inputs_dict, model_class): - model = model_class(config) - outputs = model(self._prepare_for_class(inputs_dict, model_class)) - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - - if model.config.is_encoder_decoder: - encoder_hidden_states = outputs.encoder_hidden_states - decoder_hidden_states = outputs.decoder_hidden_states - - self.assertEqual(config.output_attentions, False) - self.assertEqual(len(encoder_hidden_states), expected_num_layers) - self.assertListEqual( - list(encoder_hidden_states[0].shape[-2:]), - [self.model_tester.seq_length, self.model_tester.hidden_size], - ) - self.assertEqual(len(decoder_hidden_states), expected_num_layers) - self.assertListEqual( - list(decoder_hidden_states[0].shape[-2:]), - [self.model_tester.seq_length, self.model_tester.hidden_size], - ) - else: - hidden_states = outputs.hidden_states - self.assertEqual(config.output_attentions, False) - self.assertEqual(len(hidden_states), expected_num_layers) - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [self.model_tester.seq_length, self.model_tester.hidden_size], - ) - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True -> check_hidden_states_output(config, inputs_dict, model_class) - -tests/test_modeling_tf_common.py:1028: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -tests/test_modeling_tf_common.py:1021: in check_hidden_states_output - self.assertListEqual( -E AssertionError: Lists differ: [30, 16] != [60, 16] -E -E First differing element 0: -E 30 -E 60 -E -E - [30, 16] -E ? ^ -E -E + [60, 16] -E ? ^ -________________ TFWhisperEncoderModelTest.test_onnx_compliancy ________________ - -self = - - def test_onnx_compliancy(self): -> if not self.test_onnx: -E AttributeError: 'TFWhisperEncoderModelTest' object has no attribute 'test_onnx' - -tests/test_modeling_tf_common.py:343: AttributeError -____________ TFWhisperEncoderModelTest.test_prepare_serving_output _____________ - -self = - - def test_prepare_serving_output(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = self.has_attentions - - for model_class in self.all_model_classes: - model = model_class(config) - inputs = self._prepare_for_class(inputs_dict, model_class) - outputs = model(inputs) -> serving_outputs = model.serving_output(outputs) - -tests/test_modeling_tf_common.py:300: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -self = -output = TFSequenceClassifierOutput(loss=None, logits=)) - - def serving_output(self, output): - """ - Prepare the output of the saved model. Each model must implement this function. - - Args: - output ([`TFBaseModelOutput`]): - The output returned by the model. - """ -> raise NotImplementedError -E NotImplementedError - -src/transformers/modeling_tf_utils.py:1236: NotImplementedError -____________ TFWhisperEncoderModelTest.test_pt_tf_model_equivalence ____________ - -self = -allow_missing_keys = False - - @is_pt_tf_cross_test - def test_pt_tf_model_equivalence(self, allow_missing_keys=False): - import transformers - - for model_class in self.all_model_classes: - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - # Output all for aggressive testing - config.output_hidden_states = True - config.output_attentions = self.has_attentions - - # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency - # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`. - # TODO: Use a uniform value for all models, make sure all tests pass without this processing, and remove it. - self._make_attention_mask_non_null(inputs_dict) - - pt_model_class_name = model_class.__name__[2:] # Skip the "TF" at the beginning - pt_model_class = getattr(transformers, pt_model_class_name) - - tf_model = model_class(config) - pt_model = pt_model_class(config) - - tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - tf_inputs_dict_with_labels = self._prepare_for_class( - inputs_dict, - model_class, - # Not all models accept "labels" in the forward pass (yet :) ) - return_labels=True if "labels" in inspect.signature(model_class.call).parameters.keys() else False, - ) - - # For some models (e.g. base models), there is no label returned. - # Set the input dict to `None` to avoid check outputs twice for the same input dicts. - if not set(tf_inputs_dict_with_labels.keys()).symmetric_difference(tf_inputs_dict.keys()): - tf_inputs_dict_with_labels = None - - # Check we can load pt model in tf and vice-versa with model => model functions -> tf_model = transformers.load_pytorch_model_in_tf2_model( - tf_model, pt_model, tf_inputs=tf_inputs_dict, allow_missing_keys=allow_missing_keys - ) - -tests/test_modeling_tf_common.py:706: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -src/transformers/modeling_tf_pytorch_utils.py:204: in load_pytorch_model_in_tf2_model - return load_pytorch_weights_in_tf2_model( -src/transformers/modeling_tf_pytorch_utils.py:230: in load_pytorch_weights_in_tf2_model - return load_pytorch_state_dict_in_tf2_model( -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -tf_model = -pt_state_dict = {'classifier.bias': array([0., 0.], dtype=float32), 'classifier.weight': array([[-0.00916307, 0.00247839, 0.01647531... [ 0.04591416, -0.00516921, 0.03591024], - [-0.01335885, -0.01553431, -0.0269356 ]]], dtype=float32), ...} -tf_inputs = {'head_mask': } -allow_missing_keys = False, output_loading_info = False, _prefix = '' -tf_to_pt_weight_rename = None, ignore_mismatched_sizes = False - - def load_pytorch_state_dict_in_tf2_model( - tf_model, - pt_state_dict, - tf_inputs=None, - allow_missing_keys=False, - output_loading_info=False, - _prefix=None, - tf_to_pt_weight_rename=None, - ignore_mismatched_sizes=False, - ): - """Load a pytorch state_dict in a TF 2.0 model.""" - import tensorflow as tf - from packaging.version import parse - - if parse(tf.__version__) >= parse("2.11.0"): - from keras import backend as K - else: - from tensorflow.python.keras import backend as K - - if tf_inputs is None: - tf_inputs = tf_model.dummy_inputs - - if _prefix is None: - _prefix = "" - if tf_inputs is not None: - with tf.name_scope(_prefix): - tf_model(tf_inputs, training=False) # Make sure model is built - # Adapt state dict - TODO remove this and update the AWS weights files instead - # Convert old format to new format if needed from a PyTorch state_dict - old_keys = [] - new_keys = [] - for key in pt_state_dict.keys(): - new_key = None - if "gamma" in key: - new_key = key.replace("gamma", "weight") - if "beta" in key: - new_key = key.replace("beta", "bias") - if "running_var" in key: - new_key = key.replace("running_var", "moving_variance") - if "running_mean" in key: - new_key = key.replace("running_mean", "moving_mean") - if new_key: - old_keys.append(key) - new_keys.append(new_key) - for old_key, new_key in zip(old_keys, new_keys): - pt_state_dict[new_key] = pt_state_dict.pop(old_key) - - # Matt: All TF models store the actual model stem in a MainLayer class, including the base model. - # In PT, the derived models (with heads) use the base model class as the stem instead, and the base model - # just contains the stem itself, and there is no MainLayer class. This means that TF base classes have one - # extra layer in their weight names, corresponding to the MainLayer class. This code block compensates for that. - start_prefix_to_remove = "" - if not any(s.startswith(tf_model.base_model_prefix) for s in pt_state_dict.keys()): - start_prefix_to_remove = tf_model.base_model_prefix + "." - - symbolic_weights = tf_model.trainable_weights + tf_model.non_trainable_weights - tf_loaded_numel = 0 - weight_value_tuples = [] - all_pytorch_weights = set(pt_state_dict.keys()) - missing_keys = [] - mismatched_keys = [] - for symbolic_weight in symbolic_weights: - sw_name = symbolic_weight.name - name, transpose = convert_tf_weight_name_to_pt_weight_name( - sw_name, - start_prefix_to_remove=start_prefix_to_remove, - tf_weight_shape=symbolic_weight.shape, - name_scope=_prefix, - ) - if tf_to_pt_weight_rename is not None: - name = tf_to_pt_weight_rename(name) - - # Find associated numpy array in pytorch model state dict - if name not in pt_state_dict: - if allow_missing_keys: - missing_keys.append(name) - continue - elif tf_model._keys_to_ignore_on_load_missing is not None: - # authorized missing keys don't have to be loaded - if any(re.search(pat, name) is not None for pat in tf_model._keys_to_ignore_on_load_missing): - continue -> raise AttributeError(f"{name} not found in PyTorch model") -E AttributeError: tf_whisper_encoder_17.conv1.weight not found in PyTorch model - -src/transformers/modeling_tf_pytorch_utils.py:322: AttributeError -____________ TFWhisperEncoderModelTest.test_resize_token_embeddings ____________ - -self = - - def test_resize_token_embeddings(self): - # TODO (joao): after the embeddings refactor is complete, rework this test so as to rely exclusively on - # tf.keras.layers.Embedding - - if not self.test_resize_embeddings: - return - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - def _get_word_embedding_weight(model, embedding_layer): - if isinstance(embedding_layer, tf.keras.layers.Embedding): - # builds the embeddings layer - model(model.dummy_inputs) - return embedding_layer.embeddings - else: - return model._get_word_embedding_weight(embedding_layer) - - for model_class in self.all_model_classes: - for size in [config.vocab_size - 10, config.vocab_size + 10, None]: - # build the embeddings - model = model_class(config=copy.deepcopy(config)) # `resize_token_embeddings` mutates `config` -> old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings()) - -tests/test_modeling_tf_common.py:1211: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -self = - - def get_input_embeddings(self) -> tf.keras.layers.Layer: - """ - Returns the model's input embeddings layer. - - Returns: - `tf.Variable`: The embeddings layer mapping vocabulary to hidden states. - """ - main_layer = getattr(self, self.base_model_prefix, self) - - if main_layer is not self: - return main_layer.get_input_embeddings() - else: -> raise NotImplementedError -E NotImplementedError - -src/transformers/modeling_tf_utils.py:1262: NotImplementedError -___________________ TFWhisperEncoderModelTest.test_save_load ___________________ - -self = - - def test_save_load(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - outputs = model(self._prepare_for_class(inputs_dict, model_class)) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=False) - - # the config file (and the generation config file, if it can generate) should be saved - self.assertTrue(os.path.exists(os.path.join(tmpdirname, CONFIG_NAME))) - self.assertEqual( - model.can_generate(), os.path.exists(os.path.join(tmpdirname, GENERATION_CONFIG_NAME)) - ) - -> model = model_class.from_pretrained(tmpdirname) - -tests/test_modeling_tf_common.py:247: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -src/transformers/modeling_tf_utils.py:2810: in from_pretrained - model(model.dummy_inputs) # build the network with dummy inputs -.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler - raise e.with_traceback(filtered_tb) from None -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -self = -args = ({'decoder_input_ids': , 'input_features': <...], - [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773, - 0.8213068 , 0.2709607 ]]], dtype=float32)>},) -kwargs = {} -fn_args_and_kwargs = {'input_features': {'decoder_input_ids': , '...9569, 0.1321398 , 0.4477892 , ..., 0.09110773, - 0.8213068 , 0.2709607 ]]], dtype=float32)>}, 'kwargs_call': {}} -config = WhisperConfig { - "_name_or_path": "/tmp/tmpozgmlb9x", - "activation_dropout": 0.0, - "activation_function": "gelu", - ..."transformers_version": "4.30.0.dev0", - "use_cache": true, - "use_weighted_layer_sum": false, - "vocab_size": 51865 -} - -unpacked_inputs = {'decoder_input_ids': , 'encoder_outputs': N... [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773, - 0.8213068 , 0.2709607 ]]], dtype=float32)>, ...} - - @functools.wraps(func) - def run_call_with_unpacked_inputs(self, *args, **kwargs): - # isolates the actual `**kwargs` for the decorated function - kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)} - fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call} - fn_args_and_kwargs.update({"kwargs_call": kwargs_call}) - - # move any arg into kwargs, if they exist - fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args))) - - # Encoder Decoder models delegate the application of the configuration options to their inner models. - if "EncoderDecoder" in self.__class__.__name__: - config = None - else: - config = self.config - - unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs) -> return func(self, **unpacked_inputs) -E TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification). -E -E call() got an unexpected keyword argument 'decoder_input_ids' -E -E Call arguments received by layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification): -E • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'} -E • head_mask=None -E • encoder_outputs=None -E • labels=None -E • output_attentions=None -E • output_hidden_states=None -E • return_dict=None - -src/transformers/modeling_tf_utils.py:434: TypeError -=============================== warnings summary =============================== -.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246 - /home/adit299/transformers/.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24) - np.bool8: (False, True), - -src/transformers/models/open_llama/modeling_open_llama.py:42 - /home/adit299/transformers/src/transformers/models/open_llama/modeling_open_llama.py:42: DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead - logger.warn( - --- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html -=========================== short test summary info ============================ -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_compile_tf_model -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_encoder_outputs -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_forward_signature -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_hidden_states_output -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_onnx_compliancy -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_prepare_serving_output -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_pt_tf_model_equivalence -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_resize_token_embeddings -FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_save_load -============= 9 failed, 25 passed, 6 skipped, 2 warnings in 39.74s =============