From 246f2d26c4e6193d6be39ca53029f3034411aa58 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Sat, 11 Mar 2023 11:58:29 -0500
Subject: [PATCH 01/20] commenced work on supporting audio classification task
 for whisper model in tensorflow

---
 src/transformers/models/whisper/modeling_tf_whisper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 7a76d42fd526..35427f5d25a2 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -28,6 +28,7 @@
     TFBaseModelOutputWithPastAndCrossAttentions,
     TFSeq2SeqLMOutput,
     TFSeq2SeqModelOutput,
+    TFSequenceClassifierOutput
 )
 from ...modeling_tf_utils import (
     TFCausalLanguageModelingLoss,

From 6c4d26996d933bb7d9c39a457df601b0d7209b5a Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Fri, 31 Mar 2023 11:16:16 -0400
Subject: [PATCH 02/20] initial implementation of Whisper Audio Classification
 in tf finished

---
 .../models/whisper/modeling_tf_whisper.py     | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index c5c25e315a88..95b94a8ab16d 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1387,3 +1387,88 @@ def prepare_inputs_for_generation(
             "decoder_attention_mask": decoder_attention_mask,
             "decoder_position_ids": decoder_position_ids,
         }
+
+class TFWhisperForAudioClassification(TFWhisperPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        
+        self.encoder = TFWhisperEncoder(config)
+        num_layers = config.num_hidden_layers + 1
+        if config.use_weighted_layer_sum:
+            self.layer_weights = tf.Variable(tf.ones(shape=(num_layers,)) / num_layers)
+        self.projector = tf.keras.layers.Dense(units=config.classifier_proj_size, input_shape=(config.hidden_size,))
+        self.classifier = tf.keras.layers.Dense(units=config.num_labels, input_shape=(config.classifier_proj_size,), 
+                                                activation=None)
+    
+    @unpack_inputs
+    def call(
+        self,
+        input_features: Optional[tf.Tensor] = None,
+        head_mask: Optional[tf.Tensor] = None,
+        encoder_outputs: Optional[Tuple[Tuple[tf.Tensor]]] = None,
+        labels: Optional[tf.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None
+    ):
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states
+
+        outputs = self.encoder(
+            input_features,
+            head_mask=head_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        if self.config.use_weighted_layer_sum:
+            hidden_states = tf.stack(encoder_outputs, axis=1)
+            norm_weights = tf.nn.softmax(self.layer_weights, axis=-1)
+            hidden_states = tf.reduce_sum(hidden_states * tf.reshape(norm_weights, [-1, 1, 1]), axis=1)
+        else:
+            hidden_states = encoder_outputs[0]
+
+        hidden_states = self.projector(hidden_states)
+        pooled_output = tf.reduce_mean(hidden_states, axis=1)
+
+        logits = self.classifier(pooled_output)
+        
+        loss = None
+        
+        if labels is not None:
+            loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+            loss = loss_fn(tf.reshape(labels, [-1]), tf.reshape(logits, [-1, self.config.num_labels]))
+        
+        if not return_dict:
+            output = (logits,) + encoder_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+
+        return TFSequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+
+
+
+
+
+        
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    
+     

From 5e61c4cecc15dd1c389679443169a44caa51e7ad Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Tue, 4 Apr 2023 10:44:18 -0400
Subject: [PATCH 03/20] registering whisper audio classification in tf

---
 docs/source/en/model_doc/whisper.mdx        | 4 ++++
 src/transformers/models/whisper/__init__.py | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/docs/source/en/model_doc/whisper.mdx b/docs/source/en/model_doc/whisper.mdx
index 22b08e4e61bc..1d4226cf8dab 100644
--- a/docs/source/en/model_doc/whisper.mdx
+++ b/docs/source/en/model_doc/whisper.mdx
@@ -95,6 +95,10 @@ The original code can be found [here](https://github.com/openai/whisper).
 [[autodoc]] TFWhisperForConditionalGeneration
     - call
 
+## TFWhisperForAudioClassification
+
+[[autodoc]] TFWhisperForAudioClassification
+    - call
 
 ## FlaxWhisperModel
 
diff --git a/src/transformers/models/whisper/__init__.py b/src/transformers/models/whisper/__init__.py
index 3b6015a56f6f..34df1c8a7d32 100644
--- a/src/transformers/models/whisper/__init__.py
+++ b/src/transformers/models/whisper/__init__.py
@@ -63,6 +63,7 @@
         "TFWhisperForConditionalGeneration",
         "TFWhisperModel",
         "TFWhisperPreTrainedModel",
+        "TFWhisperForAudioClassification"
     ]
 
 try:
@@ -114,6 +115,7 @@
     else:
         from .modeling_tf_whisper import (
             TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST,
+            TFWhisperForAudioClassification,
             TFWhisperForConditionalGeneration,
             TFWhisperModel,
             TFWhisperPreTrainedModel,

From d204aea7314217fa8b47e7418ead0d9973f50ccd Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Wed, 26 Apr 2023 20:44:09 -0400
Subject: [PATCH 04/20] commencing work on writing tests

---
 .../models/auto/modeling_tf_auto.py           |   9 +
 .../models/whisper/modeling_tf_whisper.py     |  24 +--
 src/transformers/utils/dummy_tf_objects.py    |   7 +-
 .../whisper/test_modeling_tf_whisper.py       | 190 +++++++++++++++++-
 4 files changed, 205 insertions(+), 25 deletions(-)

diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py
index 8d7d72711ec2..ee61590abaff 100644
--- a/src/transformers/models/auto/modeling_tf_auto.py
+++ b/src/transformers/models/auto/modeling_tf_auto.py
@@ -164,6 +164,13 @@
     ]
 )
 
+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
+    [
+        ("whisper", "TFWhisperForAudioClassification")
+    ]
+)
+
+
 TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
     [
         # Model for Causal LM mapping
@@ -427,6 +434,8 @@
 )
 
 TF_MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_MAPPING_NAMES)
+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES)
+
 TF_MODEL_FOR_PRETRAINING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_PRETRAINING_MAPPING_NAMES)
 TF_MODEL_WITH_LM_HEAD_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_WITH_LM_HEAD_MAPPING_NAMES)
 TF_MODEL_FOR_CAUSAL_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 95b94a8ab16d..e08562059857 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1449,26 +1449,6 @@ def call(
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
         )
-
-
-
-
-
-
-        
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     
-     
+
+    
\ No newline at end of file
diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py
index 166b02854d54..9239f742eea0 100644
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@@ -2607,12 +2607,17 @@ def __init__(self, *args, **kwargs):
 TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST = None
 
 
-class TFWhisperForConditionalGeneration(metaclass=DummyObject):
+class TFWhisperForAudioClassification(metaclass=DummyObject):
     _backends = ["tf"]
 
     def __init__(self, *args, **kwargs):
         requires_backends(self, ["tf"])
 
+class TFWhisperForConditionalGeneration(metaclass=DummyObject):
+    _backends = ["tf"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
 
 class TFWhisperModel(metaclass=DummyObject):
     _backends = ["tf"]
diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py
index 2ef3cdcee02a..10687549158e 100644
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Testing suite for the TensorFlow Whisper model. """
-
+import copy
 import inspect
 import tempfile
 import traceback
@@ -39,7 +39,7 @@
 if is_tf_available():
     import tensorflow as tf
 
-    from transformers import TFWhisperForConditionalGeneration, TFWhisperModel, set_seed
+    from transformers import TFWhisperForConditionalGeneration, TFWhisperModel, TFWhisperForAudioClassification, set_seed
     from transformers.models.whisper.modeling_tf_whisper import TFWhisperDecoder, TFWhisperEncoder
 
 
@@ -803,6 +803,192 @@ def _test_large_batched_generation(in_queue, out_queue, timeout):
     out_queue.put(results, timeout=timeout)
     out_queue.join()
 
+@require_tf
+class TFWhisperEncoderModelTester:
+    def __init__(
+        self,
+        parent,
+        batch_size=13,
+        seq_length=60,
+        is_training=True,
+        use_labels=True,
+        hidden_size=16,
+        num_hidden_layers=2,
+        num_attention_heads=4,
+        input_channels=1,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=20,
+        max_source_positions=30,
+        num_mel_bins=80,
+        num_conv_layers=1,
+        suppress_tokens=None,
+        begin_suppress_tokens=None,
+        classifier_proj_size=4,
+        num_labels=2,
+        is_encoder_decoder=False,
+        is_decoder=False,
+    ):
+        self.parent = parent
+        self.batch_size = batch_size
+        self.seq_length = seq_length
+        self.is_training = is_training
+        self.use_labels = use_labels
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.input_channels = input_channels
+        self.hidden_act = hidden_act
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.num_mel_bins = num_mel_bins
+        self.max_position_embeddings = max_position_embeddings
+        self.max_source_positions = max_source_positions
+        self.num_conv_layers = num_conv_layers
+        self.suppress_tokens = suppress_tokens
+        self.begin_suppress_tokens = begin_suppress_tokens
+        self.classifier_proj_size = classifier_proj_size
+        self.num_labels = num_labels
+        self.is_encoder_decoder = is_encoder_decoder
+        self.is_decoder = is_decoder
+
+    def get_config(self):
+        return WhisperConfig(
+            d_model=self.hidden_size,
+            encoder_layers=self.num_hidden_layers,
+            decoder_layers=self.num_hidden_layers,
+            encoder_attention_heads=self.num_attention_heads,
+            decoder_attention_heads=self.num_attention_heads,
+            input_channels=self.input_channels,
+            dropout=self.hidden_dropout_prob,
+            attention_dropout=self.attention_probs_dropout_prob,
+            max_position_embeddings=self.max_position_embeddings,
+            max_source_positions=self.max_source_positions,
+            decoder_ffn_dim=self.hidden_size,
+            encoder_ffn_dim=self.hidden_size,
+            suppress_tokens=self.suppress_tokens,
+            begin_suppress_tokens=self.begin_suppress_tokens,
+            classifier_proj_size=self.classifier_proj_size,
+            num_labels=self.num_labels,
+            is_encoder_decoder=self.is_encoder_decoder,
+            is_decoder=self.is_decoder,
+        )
+
+    def prepare_config_and_inputs(self):
+        input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length])
+
+        config = self.get_config()
+        inputs_dict = prepare_whisper_encoder_inputs_dict(
+            config,
+            input_features=input_features,
+        )
+        return config, inputs_dict
+
+    def prepare_config_and_inputs_for_common(self):
+        config, inputs_dict = self.prepare_config_and_inputs()
+        return config, inputs_dict
+
+    def get_subsampled_output_lengths(self, input_lengths):
+        """
+        Computes the output length of the convolutional layers
+        """
+
+        for i in range(self.num_conv_layers):
+            input_lengths = (input_lengths - 1) // 2 + 1
+
+        return input_lengths
+
+    @property
+    def encoder_seq_length(self):
+        return self.get_subsampled_output_lengths(self.seq_length)
+
+    def create_and_check_model_forward(self, config, inputs_dict, freeze_encoder=False):
+        model = TFWhisperForAudioClassification(config=config)
+
+        if freeze_encoder:
+            model.freeze_encoder()
+
+        input_features = inputs_dict["input_features"]
+
+        # first forward pass
+        last_hidden_state = model(input_features).logits
+
+        self.parent.assertTrue(last_hidden_state.shape, (13, 2))
+
+
+def prepare_whisper_encoder_inputs_dict(config, input_features, head_mask=None):
+    if head_mask is None:
+        head_mask = tf.ones([config.encoder_layers, config.encoder_attention_heads])
+    return {"input_features": input_features, "head_mask": head_mask}
+
+@require_tf
+class TFWhisperEncoderModelTest(TFModelTesterMixin, TFGenerationTesterMixin, unittest.TestCase):
+    all_model_classes = (TFWhisperForAudioClassification,) if is_tf_available() else ()
+    is_encoder_decoder = False
+    fx_compatible = False
+    test_pruning = False
+    test_missing_keys = False
+
+    input_name = "input_features"
+
+    def setUp(self):
+        self.model_tester = TFWhisperEncoderModelTester(self)
+        self.config_tester = ConfigTester(self, config_class=WhisperConfig)
+        self.maxDiff = 3000
+
+    def test_config(self):
+        self.config_tester.run_common_tests()
+
+    def test_forward_signature(self):
+        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            signature = inspect.signature(model.forward)
+            # signature.parameters is an OrderedDict => so arg_names order is deterministic
+            arg_names = [*signature.parameters.keys()]
+
+            expected_arg_names = ["input_features", "head_mask", "encoder_outputs"]
+            self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)
+
+    # input embeds is meaningless for an encoder-only acoustic model
+    def test_inputs_embeds(self):
+        pass
+
+    # the equivalent test is passing the encoder outputs directly to the model
+    def test_encoder_outputs(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            model.to(torch_device)
+            model.eval()
+
+            inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
+
+            with tf.stop_gradient(inputs):
+                outputs = model(**inputs)[0]
+
+            input_ids = inputs["input_features"]
+            del inputs["input_features"]
+
+            encoder = model.encoder
+
+            with tf.stop_gradient(inputs):
+                inputs["encoder_outputs"] = encoder(input_ids)
+                outputs_embeds = model(**inputs)[0]
+
+            self.assertTrue((outputs_embeds == outputs).all())
+
+    # WhisperEncoder has no inputs_embeds and thus the `get_input_embeddings` fn is not implemented
+    def test_model_common_attributes(self):
+        pass
+
+    # WhisperEncoder cannot resize token embeddings since it has no tokens embeddings
+    def test_resize_tokens_embeddings(self):
+        pass
+
 
 @require_tf
 @require_tokenizers

From 7fe04e667df1b625fb57b66e3639dd439e68bf9a Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 1 May 2023 12:11:24 -0400
Subject: [PATCH 05/20] modified tests

---
 .../models/auto/modeling_tf_auto.py           | 10 ++++----
 src/transformers/models/whisper/__init__.py   |  2 +-
 .../models/whisper/modeling_tf_whisper.py     | 23 +++++++++---------
 src/transformers/utils/dummy_tf_objects.py    |  7 +-----
 .../whisper/test_modeling_tf_whisper.py       | 24 +++++++++----------
 5 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py
index ee61590abaff..db65a4c5e341 100644
--- a/src/transformers/models/auto/modeling_tf_auto.py
+++ b/src/transformers/models/auto/modeling_tf_auto.py
@@ -164,11 +164,7 @@
     ]
 )
 
-TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
-    [
-        ("whisper", "TFWhisperForAudioClassification")
-    ]
-)
+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict([("whisper", "TFWhisperForAudioClassification")])
 
 
 TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
@@ -434,7 +430,9 @@
 )
 
 TF_MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_MAPPING_NAMES)
-TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES)
+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping(
+    CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES
+)
 
 TF_MODEL_FOR_PRETRAINING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_FOR_PRETRAINING_MAPPING_NAMES)
 TF_MODEL_WITH_LM_HEAD_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_WITH_LM_HEAD_MAPPING_NAMES)
diff --git a/src/transformers/models/whisper/__init__.py b/src/transformers/models/whisper/__init__.py
index 34df1c8a7d32..8aa0749cdcc7 100644
--- a/src/transformers/models/whisper/__init__.py
+++ b/src/transformers/models/whisper/__init__.py
@@ -63,7 +63,7 @@
         "TFWhisperForConditionalGeneration",
         "TFWhisperModel",
         "TFWhisperPreTrainedModel",
-        "TFWhisperForAudioClassification"
+        "TFWhisperForAudioClassification",
     ]
 
 try:
diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index e08562059857..4e479a84f415 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -28,7 +28,7 @@
     TFBaseModelOutputWithPastAndCrossAttentions,
     TFSeq2SeqLMOutput,
     TFSeq2SeqModelOutput,
-    TFSequenceClassifierOutput
+    TFSequenceClassifierOutput,
 )
 from ...modeling_tf_utils import (
     TFCausalLanguageModelingLoss,
@@ -1388,18 +1388,20 @@ def prepare_inputs_for_generation(
             "decoder_position_ids": decoder_position_ids,
         }
 
+
 class TFWhisperForAudioClassification(TFWhisperPreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
-        
+
         self.encoder = TFWhisperEncoder(config)
         num_layers = config.num_hidden_layers + 1
         if config.use_weighted_layer_sum:
             self.layer_weights = tf.Variable(tf.ones(shape=(num_layers,)) / num_layers)
         self.projector = tf.keras.layers.Dense(units=config.classifier_proj_size, input_shape=(config.hidden_size,))
-        self.classifier = tf.keras.layers.Dense(units=config.num_labels, input_shape=(config.classifier_proj_size,), 
-                                                activation=None)
-    
+        self.classifier = tf.keras.layers.Dense(
+            units=config.num_labels, input_shape=(config.classifier_proj_size,), activation=None
+        )
+
     @unpack_inputs
     def call(
         self,
@@ -1409,7 +1411,7 @@ def call(
         labels: Optional[tf.Tensor] = None,
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None
+        return_dict: Optional[bool] = None,
     ):
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states
@@ -1432,13 +1434,13 @@ def call(
         pooled_output = tf.reduce_mean(hidden_states, axis=1)
 
         logits = self.classifier(pooled_output)
-        
+
         loss = None
-        
+
         if labels is not None:
             loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
             loss = loss_fn(tf.reshape(labels, [-1]), tf.reshape(logits, [-1, self.config.num_labels]))
-        
+
         if not return_dict:
             output = (logits,) + encoder_outputs[1:]
             return ((loss,) + output) if loss is not None else output
@@ -1449,6 +1451,3 @@ def call(
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
         )
-    
-
-    
\ No newline at end of file
diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py
index 9239f742eea0..166b02854d54 100644
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@@ -2607,18 +2607,13 @@ def __init__(self, *args, **kwargs):
 TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST = None
 
 
-class TFWhisperForAudioClassification(metaclass=DummyObject):
-    _backends = ["tf"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["tf"])
-
 class TFWhisperForConditionalGeneration(metaclass=DummyObject):
     _backends = ["tf"]
 
     def __init__(self, *args, **kwargs):
         requires_backends(self, ["tf"])
 
+
 class TFWhisperModel(metaclass=DummyObject):
     _backends = ["tf"]
 
diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py
index 10687549158e..9bec453af741 100644
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -39,7 +39,12 @@
 if is_tf_available():
     import tensorflow as tf
 
-    from transformers import TFWhisperForConditionalGeneration, TFWhisperModel, TFWhisperForAudioClassification, set_seed
+    from transformers import (
+        TFWhisperForAudioClassification,
+        TFWhisperForConditionalGeneration,
+        TFWhisperModel,
+        set_seed,
+    )
     from transformers.models.whisper.modeling_tf_whisper import TFWhisperDecoder, TFWhisperEncoder
 
 
@@ -803,6 +808,7 @@ def _test_large_batched_generation(in_queue, out_queue, timeout):
     out_queue.put(results, timeout=timeout)
     out_queue.join()
 
+
 @require_tf
 class TFWhisperEncoderModelTester:
     def __init__(
@@ -922,8 +928,9 @@ def prepare_whisper_encoder_inputs_dict(config, input_features, head_mask=None):
         head_mask = tf.ones([config.encoder_layers, config.encoder_attention_heads])
     return {"input_features": input_features, "head_mask": head_mask}
 
+
 @require_tf
-class TFWhisperEncoderModelTest(TFModelTesterMixin, TFGenerationTesterMixin, unittest.TestCase):
+class TFWhisperEncoderModelTest(TFModelTesterMixin, unittest.TestCase):
     all_model_classes = (TFWhisperForAudioClassification,) if is_tf_available() else ()
     is_encoder_decoder = False
     fx_compatible = False
@@ -962,22 +969,15 @@ def test_encoder_outputs(self):
 
         for model_class in self.all_model_classes:
             model = model_class(config)
-            model.to(torch_device)
-            model.eval()
 
             inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
-
-            with tf.stop_gradient(inputs):
-                outputs = model(**inputs)[0]
-
+            outputs = model(**inputs)[0]
             input_ids = inputs["input_features"]
             del inputs["input_features"]
 
             encoder = model.encoder
-
-            with tf.stop_gradient(inputs):
-                inputs["encoder_outputs"] = encoder(input_ids)
-                outputs_embeds = model(**inputs)[0]
+            inputs["encoder_outputs"] = encoder(input_ids)
+            outputs_embeds = model(**inputs)[0]
 
             self.assertTrue((outputs_embeds == outputs).all())
 

From 6afd3a4e4a96bdac849a77bab7c1968a219e6883 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Fri, 5 May 2023 10:54:52 -0400
Subject: [PATCH 06/20] attempting to fix issues with tests

---
 src/transformers/__init__.py                     |  2 ++
 src/transformers/models/auto/modeling_tf_auto.py | 11 +++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index 7bf322ca8e1e..464f72adfe05 100644
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -3462,6 +3462,7 @@
     _import_structure["models.whisper"].extend(
         [
             "TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST",
+            "TFWhisperForAudioClassification",
             "TFWhisperForConditionalGeneration",
             "TFWhisperModel",
             "TFWhisperPreTrainedModel",
@@ -6658,6 +6659,7 @@
         )
         from .models.whisper import (
             TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST,
+            TFWhisperForAudioClassification,
             TFWhisperForConditionalGeneration,
             TFWhisperModel,
             TFWhisperPreTrainedModel,
diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py
index a2078da33b86..0890c079ae00 100644
--- a/src/transformers/models/auto/modeling_tf_auto.py
+++ b/src/transformers/models/auto/modeling_tf_auto.py
@@ -164,9 +164,6 @@
     ]
 )
 
-TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict([("whisper", "TFWhisperForAudioClassification")])
-
-
 TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
     [
         # Model for Causal LM mapping
@@ -354,7 +351,13 @@
         ("xlnet", "TFXLNetForQuestionAnsweringSimple"),
     ]
 )
-TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict([("wav2vec2", "TFWav2Vec2ForSequenceClassification")])
+
+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
+    [
+        ("wav2vec2", "TFWav2Vec2ForSequenceClassification"),
+        ("whisper", "TFWhisperForAudioClassification"),
+    ]
+)
 
 TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict(
     [

From 874af91128601611d4052f7706ad28a075238218 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Fri, 5 May 2023 11:28:17 -0400
Subject: [PATCH 07/20] adding dummy_tf_object for whisper model

---
 src/transformers/utils/dummy_tf_objects.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py
index 2a043f50f350..80890cc14be5 100644
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@@ -2614,6 +2614,13 @@ def __init__(self, *args, **kwargs):
 TF_WHISPER_PRETRAINED_MODEL_ARCHIVE_LIST = None
 
 
+class TFWhisperForAudioClassification(metaclass=DummyObject):
+    _backends = ["tf"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tf"])
+
+
 class TFWhisperForConditionalGeneration(metaclass=DummyObject):
     _backends = ["tf"]
 

From 469e1ef4d3bb5214f3dff9a9ea3b273dae6fe568 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 8 May 2023 10:16:20 -0400
Subject: [PATCH 08/20] attempting to fix circleci tests

---
 .../models/whisper/modeling_tf_whisper.py         | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 4e479a84f415..544fda62870f 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1416,13 +1416,14 @@ def call(
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states
 
-        outputs = self.encoder(
-            input_features,
-            head_mask=head_mask,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
+        if encoder_outputs is None:
+            outputs = self.encoder(
+                input_features,
+                head_mask=head_mask,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+            )
         if self.config.use_weighted_layer_sum:
             hidden_states = tf.stack(encoder_outputs, axis=1)
             norm_weights = tf.nn.softmax(self.layer_weights, axis=-1)

From 9e7415f803f7cc967133fd926a1487adde4903b1 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 8 May 2023 10:30:36 -0400
Subject: [PATCH 09/20] correcting mistake in previous commit

---
 src/transformers/models/whisper/modeling_tf_whisper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 544fda62870f..5553e816e89f 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1417,7 +1417,7 @@ def call(
         output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states
 
         if encoder_outputs is None:
-            outputs = self.encoder(
+            encoer_outputs = self.encoder(
                 input_features,
                 head_mask=head_mask,
                 output_attentions=output_attentions,

From 67eaf018d2c06b0470191af088147ebfb1832ad9 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 8 May 2023 10:57:35 -0400
Subject: [PATCH 10/20] correcting mistakes

---
 src/transformers/models/whisper/modeling_tf_whisper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 5553e816e89f..2f713bcc69a1 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1417,7 +1417,7 @@ def call(
         output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states
 
         if encoder_outputs is None:
-            encoer_outputs = self.encoder(
+            encoder_outputs = self.encoder(
                 input_features,
                 head_mask=head_mask,
                 output_attentions=output_attentions,

From d545856d09c3d2e756bcb63adf6de02535913c06 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 8 May 2023 11:11:57 -0400
Subject: [PATCH 11/20] correcting more mistakes

---
 src/transformers/models/whisper/modeling_tf_whisper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 2f713bcc69a1..24019c158967 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1449,6 +1449,6 @@ def call(
         return TFSequenceClassifierOutput(
             loss=loss,
             logits=logits,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
         )

From 4907a9a76c4867397aeb10d84407180ad651b467 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Thu, 11 May 2023 18:07:23 -0400
Subject: [PATCH 12/20] renamed call function to forward to resolve test error

---
 src/transformers/models/whisper/modeling_tf_whisper.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 78a197608068..e682e86b2e00 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -129,7 +129,7 @@ def build(self, input_shape):
 
     def call(self, input_ids, past_key_values_length=0):
         past_key_values_length = tf.cast(past_key_values_length, tf.int32)
-        gather_indices = tf.range(tf.shape(input_ids)[1], delta=1) + past_key_values_length
+        gather_indices = tf.range(tf.shape(input_ids)[-1], delta=1) + past_key_values_length
         return tf.gather(self.weight, gather_indices)
 
 
@@ -1403,7 +1403,7 @@ def __init__(self, config):
         )
 
     @unpack_inputs
-    def call(
+    def forward(
         self,
         input_features: Optional[tf.Tensor] = None,
         head_mask: Optional[tf.Tensor] = None,
@@ -1452,3 +1452,4 @@ def call(
             hidden_states=encoder_outputs.hidden_states,
             attentions=encoder_outputs.attentions,
         )
+

From 94cdcd5f8db0a1f82aae973e8ea700856db0c4a3 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Thu, 11 May 2023 18:52:12 -0400
Subject: [PATCH 13/20] attempting to resolve more test errors

---
 .../models/whisper/modeling_tf_whisper.py           |  2 +-
 tests/models/whisper/test_modeling_tf_whisper.py    | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index e682e86b2e00..4c5d9614d3ae 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1403,7 +1403,7 @@ def __init__(self, config):
         )
 
     @unpack_inputs
-    def forward(
+    def call(
         self,
         input_features: Optional[tf.Tensor] = None,
         head_mask: Optional[tf.Tensor] = None,
diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py
index e01d2a981ed4..7fea670fb9ee 100644
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -952,7 +952,7 @@ def test_forward_signature(self):
 
         for model_class in self.all_model_classes:
             model = model_class(config)
-            signature = inspect.signature(model.forward)
+            signature = inspect.signature(model.call)
             # signature.parameters is an OrderedDict => so arg_names order is deterministic
             arg_names = [*signature.parameters.keys()]
 
@@ -971,13 +971,18 @@ def test_encoder_outputs(self):
             model = model_class(config)
 
             inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
-            outputs = model(**inputs)[0]
+            
+            with tf.stop_gradient:
+                outputs = model(**inputs)[0]
+            
             input_ids = inputs["input_features"]
             del inputs["input_features"]
 
             encoder = model.encoder
-            inputs["encoder_outputs"] = encoder(input_ids)
-            outputs_embeds = model(**inputs)[0]
+            
+            with tf.stop_gradient:
+                inputs["encoder_outputs"] = encoder(input_ids)
+                outputs_embeds = model(**inputs)[0]
 
             self.assertTrue((outputs_embeds == outputs).all())
 

From 24a42fe00f861f8468c72cdfe09c9b0d59d8fb95 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Tue, 16 May 2023 13:02:05 -0400
Subject: [PATCH 14/20] addressing review comments

---
 tests/models/whisper/test_modeling_tf_whisper.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py
index 7fea670fb9ee..1bf8d4ecc982 100644
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -972,17 +972,15 @@ def test_encoder_outputs(self):
 
             inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
             
-            with tf.stop_gradient:
-                outputs = model(**inputs)[0]
+            outputs = model(**inputs)[0]
             
             input_ids = inputs["input_features"]
             del inputs["input_features"]
 
             encoder = model.encoder
             
-            with tf.stop_gradient:
-                inputs["encoder_outputs"] = encoder(input_ids)
-                outputs_embeds = model(**inputs)[0]
+            inputs["encoder_outputs"] = encoder(input_ids)
+            outputs_embeds = model(**inputs)[0]
 
             self.assertTrue((outputs_embeds == outputs).all())
 

From 6f4101d8a0000e8aeb956151364c082d8e7ba51b Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Wed, 17 May 2023 11:41:11 -0400
Subject: [PATCH 15/20] Addressing review comments and fixing code quality

---
 src/transformers/models/whisper/modeling_tf_whisper.py | 1 -
 tests/models/whisper/test_modeling_tf_whisper.py       | 9 ++++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 4c5d9614d3ae..24019c158967 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1452,4 +1452,3 @@ def call(
             hidden_states=encoder_outputs.hidden_states,
             attentions=encoder_outputs.attentions,
         )
-
diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py
index 68b53fdce3c0..9765bbd54936 100644
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -975,18 +975,17 @@ def test_encoder_outputs(self):
             model = model_class(config)
 
             inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
-            
+
             outputs = model(**inputs)[0]
-            
+
             input_ids = inputs["input_features"]
-            del inputs["input_features"]
 
             encoder = model.encoder
-            
+
             inputs["encoder_outputs"] = encoder(input_ids)
             outputs_embeds = model(**inputs)[0]
 
-            self.assertTrue((outputs_embeds == outputs).all())
+            self.assertTrue(tf.experimental.numpy.all(outputs_embeds == outputs))
 
     # WhisperEncoder has no inputs_embeds and thus the `get_input_embeddings` fn is not implemented
     def test_model_common_attributes(self):

From 54b6eddf68c84fbf7ccd2c0fedeb1fe6d9656384 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Sun, 18 Jun 2023 16:20:47 -0400
Subject: [PATCH 16/20] solving onnx test error

---
 tests/models/whisper/test_modeling_tf_whisper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py
index 9765bbd54936..7d099dc26ba2 100644
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -940,6 +940,7 @@ class TFWhisperEncoderModelTest(TFModelTesterMixin, unittest.TestCase):
     fx_compatible = False
     test_pruning = False
     test_missing_keys = False
+    test_onnx = False
 
     input_name = "input_features"
 

From d0f7e22326a41a3455e8ef890959f517d1bd718a Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Sun, 18 Jun 2023 19:38:28 -0400
Subject: [PATCH 17/20] attempting to fix failing quality tests

---
 tests/models/whisper/test_modeling_tf_whisper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py
index c88dc0d074b5..f60cb8a3ee97 100644
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Testing suite for the TensorFlow Whisper model. """
+from __future__ import annotations
+
 import copy
 import inspect
 import tempfile
@@ -21,7 +23,6 @@
 
 import numpy as np
 
-from __future__ import annotations
 from transformers import WhisperConfig, WhisperFeatureExtractor, WhisperProcessor
 from transformers.testing_utils import is_tf_available, require_tf, require_tokenizers, run_test_in_subprocess, slow
 from transformers.utils import cached_property

From 4f02f9e8b2dbeecaf98ac1fbeaadca146f0611a8 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 28 Aug 2023 13:08:06 -0400
Subject: [PATCH 18/20] resolving test_resize_token_embeddings

---
 .../models/whisper/modeling_tf_whisper.py       | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index f9168eebdd16..2e450c16d190 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -487,14 +487,14 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]:
             self.main_input_name: tf.random.uniform(
                 [1, self.config.num_mel_bins, self.config.max_source_positions * 2 - 1], dtype=tf.float32
             ),
-            "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32),
+            # "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32),
         }
 
     @property
     def input_signature(self):
         return {
             "input_features": tf.TensorSpec((None, self.config.num_mel_bins, None), tf.float32, name="input_features"),
-            "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
+            # "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
             "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
         }
 
@@ -628,6 +628,12 @@ def __init__(self, config: WhisperConfig, **kwargs):
         self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layer_norm")
 
         self.dropout = tf.keras.layers.Dropout(config.dropout)
+    
+    def get_input_embeddings(self):
+        return self.conv1
+
+    def set_input_embeddings(self, value):
+        self.conv1 = value
 
     @unpack_inputs
     def call(
@@ -1614,6 +1620,13 @@ def __init__(self, config):
         self.classifier = tf.keras.layers.Dense(
             units=config.num_labels, input_shape=(config.classifier_proj_size,), activation=None
         )
+    
+    def get_input_embeddings(self):
+        return self.encoder.get_input_embeddings()
+
+    def set_input_embeddings(self, value):
+        self.encoder.set_input_embeddings(value)
+
 
     @unpack_inputs
     def call(

From 468a9bf63519c4e91c63472935f7a1e46cc13b97 Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 4 Sep 2023 11:50:25 -0400
Subject: [PATCH 19/20] override dummy_inputs and input_signature methods

---
 myoutput.txt                                  | 628 ++++++++++++++++++
 .../models/auto/modeling_tf_auto.py           |   2 +-
 .../models/whisper/modeling_tf_whisper.py     |  23 +-
 3 files changed, 650 insertions(+), 3 deletions(-)
 create mode 100644 myoutput.txt

diff --git a/myoutput.txt b/myoutput.txt
new file mode 100644
index 000000000000..81bc8eabe5c7
--- /dev/null
+++ b/myoutput.txt
@@ -0,0 +1,628 @@
+============================= test session starts ==============================
+platform linux -- Python 3.8.10, pytest-7.2.1, pluggy-1.0.0
+rootdir: /home/adit299/transformers, configfile: setup.cfg
+plugins: hypothesis-6.64.0, timeout-2.1.0, xdist-3.1.0
+collected 40 items
+
+tests/models/whisper/test_modeling_tf_whisper.py .F...sFF..F............ [ 57%]
+....FsFFF.Fs.ss.s                                                        [100%]
+
+=================================== FAILURES ===================================
+_______________ TFWhisperEncoderModelTest.test_compile_tf_model ________________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_compile_tf_model>
+
+    def test_compile_tf_model(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        max_input = getattr(self.model_tester, "max_position_embeddings", 512)
+        optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
+        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+        metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
+    
+        for model_class in self.all_model_classes:
+            if model_class.__name__ in ["TFSpeech2TextModel", "TFSpeech2TextForConditionalGeneration"]:
+                inputs = {
+                    "decoder_input_ids": tf.keras.Input(
+                        batch_shape=(2, max_input),
+                        name="decoder_input_ids",
+                        dtype="int32",
+                    ),
+                    "input_features": tf.keras.Input(
+                        batch_shape=(
+                            2,
+                            max_input,
+                            self.model_tester.input_feat_per_channel * self.model_tester.input_channels,
+                        ),
+                        name="input_features",
+                        dtype="float32",
+                    ),
+                }
+            elif model_class.__name__ in ["TFWhisperModel", "TFWhisperForConditionalGeneration"]:
+                inputs = {
+                    "decoder_input_ids": tf.keras.Input(
+                        batch_shape=(2, max_input),
+                        name="decoder_input_ids",
+                        dtype="int32",
+                    ),
+                    "input_features": tf.keras.Input(
+                        batch_shape=(
+                            2,
+                            self.model_tester.num_mel_bins,
+                            self.model_tester.seq_length,
+                        ),
+                        name="input_features",
+                        dtype="float32",
+                    ),
+                }
+            elif self.is_encoder_decoder:
+                inputs = {
+                    "decoder_input_ids": tf.keras.Input(
+                        batch_shape=(2, max_input),
+                        name="decoder_input_ids",
+                        dtype="int32",
+                    ),
+                    "input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"),
+                }
+            # `pixel_values` implies that the input is an image
+            elif model_class.main_input_name == "pixel_values":
+                inputs = tf.keras.Input(
+                    batch_shape=(
+                        3,
+                        self.model_tester.num_channels,
+                        self.model_tester.image_size,
+                        self.model_tester.image_size,
+                    ),
+                    name="pixel_values",
+                    dtype="float32",
+                )
+            elif model_class.__name__ in ["TFCLIPModel", "TFGroupViTModel", "TFBlipModel"]:
+                inputs = {
+                    "input_ids": tf.keras.Input(batch_shape=(3, max_input), name="input_ids", dtype="int32"),
+                    "pixel_values": tf.keras.Input(
+                        batch_shape=(
+                            3,
+                            self.model_tester.vision_model_tester.num_channels,
+                            self.model_tester.vision_model_tester.image_size,
+                            self.model_tester.vision_model_tester.image_size,
+                        ),
+                        name="pixel_values",
+                        dtype="float32",
+                    ),
+                }
+            elif model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
+                inputs = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32")
+            else:
+                inputs = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32")
+    
+            # Prepare our model
+            model = model_class(config)
+            model(self._prepare_for_class(inputs_dict, model_class))  # Model must be called before saving.
+            # Let's load it from the disk to be sure we can use pretrained weights
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                model.save_pretrained(tmpdirname, saved_model=False)
+>               model = model_class.from_pretrained(tmpdirname)
+
+tests/test_modeling_tf_common.py:827: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+src/transformers/modeling_tf_utils.py:2810: in from_pretrained
+    model(model.dummy_inputs)  # build the network with dummy inputs
+.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler
+    raise e.with_traceback(filtered_tb) from None
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab3b5a160>
+args = ({'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'input_features': <...],
+        [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337,
+         0.2877066 , 0.8109739 ]]], dtype=float32)>},)
+kwargs = {}
+fn_args_and_kwargs = {'input_features': {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, '...2292, 0.9640751 , 0.32960486, ..., 0.53566337,
+         0.2877066 , 0.8109739 ]]], dtype=float32)>}, 'kwargs_call': {}}
+config = WhisperConfig {
+  "_name_or_path": "/tmp/tmpa3qgulea",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+ ..."transformers_version": "4.30.0.dev0",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}
+
+unpacked_inputs = {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'encoder_outputs': N...        [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337,
+         0.2877066 , 0.8109739 ]]], dtype=float32)>, ...}
+
+    @functools.wraps(func)
+    def run_call_with_unpacked_inputs(self, *args, **kwargs):
+        # isolates the actual `**kwargs` for the decorated function
+        kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)}
+        fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call}
+        fn_args_and_kwargs.update({"kwargs_call": kwargs_call})
+    
+        # move any arg into kwargs, if they exist
+        fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
+    
+        # Encoder Decoder models delegate the application of the configuration options to their inner models.
+        if "EncoderDecoder" in self.__class__.__name__:
+            config = None
+        else:
+            config = self.config
+    
+        unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs)
+>       return func(self, **unpacked_inputs)
+E       TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification).
+E       
+E       call() got an unexpected keyword argument 'decoder_input_ids'
+E       
+E       Call arguments received by layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification):
+E         • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'}
+E         • head_mask=None
+E         • encoder_outputs=None
+E         • labels=None
+E         • output_attentions=None
+E         • output_hidden_states=None
+E         • return_dict=None
+
+src/transformers/modeling_tf_utils.py:434: TypeError
+________________ TFWhisperEncoderModelTest.test_encoder_outputs ________________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_encoder_outputs>
+
+    def test_encoder_outputs(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+    
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+    
+            inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
+            outputs = model(**inputs)[0]
+            input_ids = inputs["input_features"]
+            del inputs["input_features"]
+    
+            encoder = model.encoder
+            inputs["encoder_outputs"] = encoder(input_ids)
+>           outputs_embeds = model(**inputs)[0]
+
+tests/models/whisper/test_modeling_tf_whisper.py:980: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler
+    raise e.with_traceback(filtered_tb) from None
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <keras.utils.layer_utils.CallFunctionSpec object at 0x7f8ab36e31c0>
+args = ()
+kwargs = {'encoder_outputs': TFBaseModelOutput(last_hidden_state=<tf.Tensor: shape=(13, 30, 16), dtype=float32, numpy=
+array([[...k': <tf.Tensor: shape=(2, 4), dtype=float32, numpy=
+array([[1., 1., 1., 1.],
+       [1., 1., 1., 1.]], dtype=float32)>}
+
+    def split_out_first_arg(self, args, kwargs):
+        """Splits (args, kwargs) into (inputs, args, kwargs)."""
+        # Grab the argument corresponding to the first argument in the
+        # layer's `call` method spec. This will either be the first positional
+        # argument, or it will be provided as a keyword argument.
+        if args:
+            inputs = args[0]
+            args = args[1:]
+        elif self._arg_names[0] in kwargs:
+            kwargs = copy.copy(kwargs)
+            inputs = kwargs.pop(self._arg_names[0])
+        else:
+>           raise ValueError(
+                "The first argument to `Layer.call` must always be passed."
+            )
+E           ValueError: The first argument to `Layer.call` must always be passed.
+
+.env/lib/python3.8/site-packages/keras/utils/layer_utils.py:809: ValueError
+_______________ TFWhisperEncoderModelTest.test_forward_signature _______________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_forward_signature>
+
+    def test_forward_signature(self):
+        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
+    
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+>           signature = inspect.signature(model.forward)
+E           AttributeError: 'TFWhisperForAudioClassification' object has no attribute 'forward'
+
+tests/models/whisper/test_modeling_tf_whisper.py:955: AttributeError
+_____________ TFWhisperEncoderModelTest.test_hidden_states_output ______________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_hidden_states_output>
+
+    def test_hidden_states_output(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+    
+        def check_hidden_states_output(config, inputs_dict, model_class):
+            model = model_class(config)
+            outputs = model(self._prepare_for_class(inputs_dict, model_class))
+            expected_num_layers = getattr(
+                self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
+            )
+    
+            if model.config.is_encoder_decoder:
+                encoder_hidden_states = outputs.encoder_hidden_states
+                decoder_hidden_states = outputs.decoder_hidden_states
+    
+                self.assertEqual(config.output_attentions, False)
+                self.assertEqual(len(encoder_hidden_states), expected_num_layers)
+                self.assertListEqual(
+                    list(encoder_hidden_states[0].shape[-2:]),
+                    [self.model_tester.seq_length, self.model_tester.hidden_size],
+                )
+                self.assertEqual(len(decoder_hidden_states), expected_num_layers)
+                self.assertListEqual(
+                    list(decoder_hidden_states[0].shape[-2:]),
+                    [self.model_tester.seq_length, self.model_tester.hidden_size],
+                )
+            else:
+                hidden_states = outputs.hidden_states
+                self.assertEqual(config.output_attentions, False)
+                self.assertEqual(len(hidden_states), expected_num_layers)
+                self.assertListEqual(
+                    list(hidden_states[0].shape[-2:]),
+                    [self.model_tester.seq_length, self.model_tester.hidden_size],
+                )
+    
+        for model_class in self.all_model_classes:
+            inputs_dict["output_hidden_states"] = True
+>           check_hidden_states_output(config, inputs_dict, model_class)
+
+tests/test_modeling_tf_common.py:1028: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+tests/test_modeling_tf_common.py:1021: in check_hidden_states_output
+    self.assertListEqual(
+E   AssertionError: Lists differ: [30, 16] != [60, 16]
+E   
+E   First differing element 0:
+E   30
+E   60
+E   
+E   - [30, 16]
+E   ?  ^
+E   
+E   + [60, 16]
+E   ?  ^
+________________ TFWhisperEncoderModelTest.test_onnx_compliancy ________________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_onnx_compliancy>
+
+    def test_onnx_compliancy(self):
+>       if not self.test_onnx:
+E       AttributeError: 'TFWhisperEncoderModelTest' object has no attribute 'test_onnx'
+
+tests/test_modeling_tf_common.py:343: AttributeError
+____________ TFWhisperEncoderModelTest.test_prepare_serving_output _____________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_prepare_serving_output>
+
+    def test_prepare_serving_output(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        config.output_hidden_states = True
+        config.output_attentions = self.has_attentions
+    
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            inputs = self._prepare_for_class(inputs_dict, model_class)
+            outputs = model(inputs)
+>           serving_outputs = model.serving_output(outputs)
+
+tests/test_modeling_tf_common.py:300: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab3909dc0>
+output = TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(13, 2), dtype=float32, numpy=
+array([[1.8147802, 1.989...
+         [0.05317271, 0.03583057, 0.03193213, ..., 0.02386571,
+          0.02572016, 0.02103928]]]], dtype=float32)>))
+
+    def serving_output(self, output):
+        """
+        Prepare the output of the saved model. Each model must implement this function.
+    
+        Args:
+            output ([`TFBaseModelOutput`]):
+                The output returned by the model.
+        """
+>       raise NotImplementedError
+E       NotImplementedError
+
+src/transformers/modeling_tf_utils.py:1236: NotImplementedError
+____________ TFWhisperEncoderModelTest.test_pt_tf_model_equivalence ____________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_pt_tf_model_equivalence>
+allow_missing_keys = False
+
+    @is_pt_tf_cross_test
+    def test_pt_tf_model_equivalence(self, allow_missing_keys=False):
+        import transformers
+    
+        for model_class in self.all_model_classes:
+            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+    
+            # Output all for aggressive testing
+            config.output_hidden_states = True
+            config.output_attentions = self.has_attentions
+    
+            # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency
+            # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`.
+            # TODO: Use a uniform value for all models, make sure all tests pass without this processing, and remove it.
+            self._make_attention_mask_non_null(inputs_dict)
+    
+            pt_model_class_name = model_class.__name__[2:]  # Skip the "TF" at the beginning
+            pt_model_class = getattr(transformers, pt_model_class_name)
+    
+            tf_model = model_class(config)
+            pt_model = pt_model_class(config)
+    
+            tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
+            tf_inputs_dict_with_labels = self._prepare_for_class(
+                inputs_dict,
+                model_class,
+                # Not all models accept "labels" in the forward pass (yet :) )
+                return_labels=True if "labels" in inspect.signature(model_class.call).parameters.keys() else False,
+            )
+    
+            # For some models (e.g. base models), there is no label returned.
+            # Set the input dict to `None` to avoid check outputs twice for the same input dicts.
+            if not set(tf_inputs_dict_with_labels.keys()).symmetric_difference(tf_inputs_dict.keys()):
+                tf_inputs_dict_with_labels = None
+    
+            # Check we can load pt model in tf and vice-versa with model => model functions
+>           tf_model = transformers.load_pytorch_model_in_tf2_model(
+                tf_model, pt_model, tf_inputs=tf_inputs_dict, allow_missing_keys=allow_missing_keys
+            )
+
+tests/test_modeling_tf_common.py:706: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+src/transformers/modeling_tf_pytorch_utils.py:204: in load_pytorch_model_in_tf2_model
+    return load_pytorch_weights_in_tf2_model(
+src/transformers/modeling_tf_pytorch_utils.py:230: in load_pytorch_weights_in_tf2_model
+    return load_pytorch_state_dict_in_tf2_model(
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+tf_model = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab36988b0>
+pt_state_dict = {'classifier.bias': array([0., 0.], dtype=float32), 'classifier.weight': array([[-0.00916307,  0.00247839,  0.01647531...       [ 0.04591416, -0.00516921,  0.03591024],
+        [-0.01335885, -0.01553431, -0.0269356 ]]], dtype=float32), ...}
+tf_inputs = {'head_mask': <tf.Tensor: shape=(2, 4), dtype=float32, numpy=
+array([[1., 1., 1., 1.],
+       [1., 1., 1., 1.]], dtype...53],
+        [0.87383145, 0.34678572, 0.2766716 , ..., 0.37082306,
+         0.825713  , 0.6365404 ]]], dtype=float32)>}
+allow_missing_keys = False, output_loading_info = False, _prefix = ''
+tf_to_pt_weight_rename = None, ignore_mismatched_sizes = False
+
+    def load_pytorch_state_dict_in_tf2_model(
+        tf_model,
+        pt_state_dict,
+        tf_inputs=None,
+        allow_missing_keys=False,
+        output_loading_info=False,
+        _prefix=None,
+        tf_to_pt_weight_rename=None,
+        ignore_mismatched_sizes=False,
+    ):
+        """Load a pytorch state_dict in a TF 2.0 model."""
+        import tensorflow as tf
+        from packaging.version import parse
+    
+        if parse(tf.__version__) >= parse("2.11.0"):
+            from keras import backend as K
+        else:
+            from tensorflow.python.keras import backend as K
+    
+        if tf_inputs is None:
+            tf_inputs = tf_model.dummy_inputs
+    
+        if _prefix is None:
+            _prefix = ""
+        if tf_inputs is not None:
+            with tf.name_scope(_prefix):
+                tf_model(tf_inputs, training=False)  # Make sure model is built
+        # Adapt state dict - TODO remove this and update the AWS weights files instead
+        # Convert old format to new format if needed from a PyTorch state_dict
+        old_keys = []
+        new_keys = []
+        for key in pt_state_dict.keys():
+            new_key = None
+            if "gamma" in key:
+                new_key = key.replace("gamma", "weight")
+            if "beta" in key:
+                new_key = key.replace("beta", "bias")
+            if "running_var" in key:
+                new_key = key.replace("running_var", "moving_variance")
+            if "running_mean" in key:
+                new_key = key.replace("running_mean", "moving_mean")
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            pt_state_dict[new_key] = pt_state_dict.pop(old_key)
+    
+        # Matt: All TF models store the actual model stem in a MainLayer class, including the base model.
+        # In PT, the derived models (with heads) use the base model class as the stem instead, and the base model
+        # just contains the stem itself, and there is no MainLayer class. This means that TF base classes have one
+        # extra layer in their weight names, corresponding to the MainLayer class. This code block compensates for that.
+        start_prefix_to_remove = ""
+        if not any(s.startswith(tf_model.base_model_prefix) for s in pt_state_dict.keys()):
+            start_prefix_to_remove = tf_model.base_model_prefix + "."
+    
+        symbolic_weights = tf_model.trainable_weights + tf_model.non_trainable_weights
+        tf_loaded_numel = 0
+        weight_value_tuples = []
+        all_pytorch_weights = set(pt_state_dict.keys())
+        missing_keys = []
+        mismatched_keys = []
+        for symbolic_weight in symbolic_weights:
+            sw_name = symbolic_weight.name
+            name, transpose = convert_tf_weight_name_to_pt_weight_name(
+                sw_name,
+                start_prefix_to_remove=start_prefix_to_remove,
+                tf_weight_shape=symbolic_weight.shape,
+                name_scope=_prefix,
+            )
+            if tf_to_pt_weight_rename is not None:
+                name = tf_to_pt_weight_rename(name)
+    
+            # Find associated numpy array in pytorch model state dict
+            if name not in pt_state_dict:
+                if allow_missing_keys:
+                    missing_keys.append(name)
+                    continue
+                elif tf_model._keys_to_ignore_on_load_missing is not None:
+                    # authorized missing keys don't have to be loaded
+                    if any(re.search(pat, name) is not None for pat in tf_model._keys_to_ignore_on_load_missing):
+                        continue
+>               raise AttributeError(f"{name} not found in PyTorch model")
+E               AttributeError: tf_whisper_encoder_17.conv1.weight not found in PyTorch model
+
+src/transformers/modeling_tf_pytorch_utils.py:322: AttributeError
+____________ TFWhisperEncoderModelTest.test_resize_token_embeddings ____________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_resize_token_embeddings>
+
+    def test_resize_token_embeddings(self):
+        # TODO (joao): after the embeddings refactor is complete, rework this test so as to rely exclusively on
+        # tf.keras.layers.Embedding
+    
+        if not self.test_resize_embeddings:
+            return
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+    
+        def _get_word_embedding_weight(model, embedding_layer):
+            if isinstance(embedding_layer, tf.keras.layers.Embedding):
+                # builds the embeddings layer
+                model(model.dummy_inputs)
+                return embedding_layer.embeddings
+            else:
+                return model._get_word_embedding_weight(embedding_layer)
+    
+        for model_class in self.all_model_classes:
+            for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
+                # build the embeddings
+                model = model_class(config=copy.deepcopy(config))  # `resize_token_embeddings` mutates `config`
+>               old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
+
+tests/test_modeling_tf_common.py:1211: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab3b77d30>
+
+    def get_input_embeddings(self) -> tf.keras.layers.Layer:
+        """
+        Returns the model's input embeddings layer.
+    
+        Returns:
+            `tf.Variable`: The embeddings layer mapping vocabulary to hidden states.
+        """
+        main_layer = getattr(self, self.base_model_prefix, self)
+    
+        if main_layer is not self:
+            return main_layer.get_input_embeddings()
+        else:
+>           raise NotImplementedError
+E           NotImplementedError
+
+src/transformers/modeling_tf_utils.py:1262: NotImplementedError
+___________________ TFWhisperEncoderModelTest.test_save_load ___________________
+
+self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_save_load>
+
+    def test_save_load(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+    
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            outputs = model(self._prepare_for_class(inputs_dict, model_class))
+    
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                model.save_pretrained(tmpdirname, saved_model=False)
+    
+                # the config file (and the generation config file, if it can generate) should be saved
+                self.assertTrue(os.path.exists(os.path.join(tmpdirname, CONFIG_NAME)))
+                self.assertEqual(
+                    model.can_generate(), os.path.exists(os.path.join(tmpdirname, GENERATION_CONFIG_NAME))
+                )
+    
+>               model = model_class.from_pretrained(tmpdirname)
+
+tests/test_modeling_tf_common.py:247: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+src/transformers/modeling_tf_utils.py:2810: in from_pretrained
+    model(model.dummy_inputs)  # build the network with dummy inputs
+.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler
+    raise e.with_traceback(filtered_tb) from None
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab352caf0>
+args = ({'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'input_features': <...],
+        [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773,
+         0.8213068 , 0.2709607 ]]], dtype=float32)>},)
+kwargs = {}
+fn_args_and_kwargs = {'input_features': {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, '...9569, 0.1321398 , 0.4477892 , ..., 0.09110773,
+         0.8213068 , 0.2709607 ]]], dtype=float32)>}, 'kwargs_call': {}}
+config = WhisperConfig {
+  "_name_or_path": "/tmp/tmpozgmlb9x",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+ ..."transformers_version": "4.30.0.dev0",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}
+
+unpacked_inputs = {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'encoder_outputs': N...        [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773,
+         0.8213068 , 0.2709607 ]]], dtype=float32)>, ...}
+
+    @functools.wraps(func)
+    def run_call_with_unpacked_inputs(self, *args, **kwargs):
+        # isolates the actual `**kwargs` for the decorated function
+        kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)}
+        fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call}
+        fn_args_and_kwargs.update({"kwargs_call": kwargs_call})
+    
+        # move any arg into kwargs, if they exist
+        fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
+    
+        # Encoder Decoder models delegate the application of the configuration options to their inner models.
+        if "EncoderDecoder" in self.__class__.__name__:
+            config = None
+        else:
+            config = self.config
+    
+        unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs)
+>       return func(self, **unpacked_inputs)
+E       TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification).
+E       
+E       call() got an unexpected keyword argument 'decoder_input_ids'
+E       
+E       Call arguments received by layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification):
+E         • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'}
+E         • head_mask=None
+E         • encoder_outputs=None
+E         • labels=None
+E         • output_attentions=None
+E         • output_hidden_states=None
+E         • return_dict=None
+
+src/transformers/modeling_tf_utils.py:434: TypeError
+=============================== warnings summary ===============================
+.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246
+  /home/adit299/transformers/.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)
+    np.bool8: (False, True),
+
+src/transformers/models/open_llama/modeling_open_llama.py:42
+  /home/adit299/transformers/src/transformers/models/open_llama/modeling_open_llama.py:42: DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead
+    logger.warn(
+
+-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
+=========================== short test summary info ============================
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_compile_tf_model
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_encoder_outputs
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_forward_signature
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_hidden_states_output
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_onnx_compliancy
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_prepare_serving_output
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_pt_tf_model_equivalence
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_resize_token_embeddings
+FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_save_load
+============= 9 failed, 25 passed, 6 skipped, 2 warnings in 39.74s =============
diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py
index 0118db319b87..72156dd6e468 100644
--- a/src/transformers/models/auto/modeling_tf_auto.py
+++ b/src/transformers/models/auto/modeling_tf_auto.py
@@ -467,7 +467,7 @@
 )
 
 TF_MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TF_MODEL_MAPPING_NAMES)
-TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = _LazyAutoMapping(
+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = _LazyAutoMapping(
     CONFIG_MAPPING_NAMES, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES
 )
 
diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py
index 2e450c16d190..2f4e47d15502 100644
--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -487,14 +487,14 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]:
             self.main_input_name: tf.random.uniform(
                 [1, self.config.num_mel_bins, self.config.max_source_positions * 2 - 1], dtype=tf.float32
             ),
-            # "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32),
+            "decoder_input_ids": tf.constant([[1, 3]], dtype=tf.int32),
         }
 
     @property
     def input_signature(self):
         return {
             "input_features": tf.TensorSpec((None, self.config.num_mel_bins, None), tf.float32, name="input_features"),
-            # "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
+            "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
             "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
         }
 
@@ -1626,7 +1626,26 @@ def get_input_embeddings(self):
 
     def set_input_embeddings(self, value):
         self.encoder.set_input_embeddings(value)
+    
+    @property
+    def dummy_inputs(self) -> Dict[str, tf.Tensor]:
+        """
+        Dummy inputs to build the network.
 
+        Returns:
+            `Dict[str, tf.Tensor]`: The dummy inputs.
+        """
+        return {
+            self.main_input_name: tf.random.uniform(
+                [1, self.config.num_mel_bins, self.config.max_source_positions * 2 - 1], dtype=tf.float32
+            ),
+        }
+
+    @property
+    def input_signature(self):
+        return {
+            "input_features": tf.TensorSpec((None, self.config.num_mel_bins, None), tf.float32, name="input_features"),
+        }
 
     @unpack_inputs
     def call(

From 66fb2b5805d6b4412e94b542485aa98968dfcb6a Mon Sep 17 00:00:00 2001
From: adit299 <akarishnan@gmail.com>
Date: Mon, 4 Sep 2023 12:03:42 -0400
Subject: [PATCH 20/20] removing uneeded file

---
 myoutput.txt | 628 ---------------------------------------------------
 1 file changed, 628 deletions(-)
 delete mode 100644 myoutput.txt

diff --git a/myoutput.txt b/myoutput.txt
deleted file mode 100644
index 81bc8eabe5c7..000000000000
--- a/myoutput.txt
+++ /dev/null
@@ -1,628 +0,0 @@
-============================= test session starts ==============================
-platform linux -- Python 3.8.10, pytest-7.2.1, pluggy-1.0.0
-rootdir: /home/adit299/transformers, configfile: setup.cfg
-plugins: hypothesis-6.64.0, timeout-2.1.0, xdist-3.1.0
-collected 40 items
-
-tests/models/whisper/test_modeling_tf_whisper.py .F...sFF..F............ [ 57%]
-....FsFFF.Fs.ss.s                                                        [100%]
-
-=================================== FAILURES ===================================
-_______________ TFWhisperEncoderModelTest.test_compile_tf_model ________________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_compile_tf_model>
-
-    def test_compile_tf_model(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        max_input = getattr(self.model_tester, "max_position_embeddings", 512)
-        optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
-        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
-        metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
-    
-        for model_class in self.all_model_classes:
-            if model_class.__name__ in ["TFSpeech2TextModel", "TFSpeech2TextForConditionalGeneration"]:
-                inputs = {
-                    "decoder_input_ids": tf.keras.Input(
-                        batch_shape=(2, max_input),
-                        name="decoder_input_ids",
-                        dtype="int32",
-                    ),
-                    "input_features": tf.keras.Input(
-                        batch_shape=(
-                            2,
-                            max_input,
-                            self.model_tester.input_feat_per_channel * self.model_tester.input_channels,
-                        ),
-                        name="input_features",
-                        dtype="float32",
-                    ),
-                }
-            elif model_class.__name__ in ["TFWhisperModel", "TFWhisperForConditionalGeneration"]:
-                inputs = {
-                    "decoder_input_ids": tf.keras.Input(
-                        batch_shape=(2, max_input),
-                        name="decoder_input_ids",
-                        dtype="int32",
-                    ),
-                    "input_features": tf.keras.Input(
-                        batch_shape=(
-                            2,
-                            self.model_tester.num_mel_bins,
-                            self.model_tester.seq_length,
-                        ),
-                        name="input_features",
-                        dtype="float32",
-                    ),
-                }
-            elif self.is_encoder_decoder:
-                inputs = {
-                    "decoder_input_ids": tf.keras.Input(
-                        batch_shape=(2, max_input),
-                        name="decoder_input_ids",
-                        dtype="int32",
-                    ),
-                    "input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"),
-                }
-            # `pixel_values` implies that the input is an image
-            elif model_class.main_input_name == "pixel_values":
-                inputs = tf.keras.Input(
-                    batch_shape=(
-                        3,
-                        self.model_tester.num_channels,
-                        self.model_tester.image_size,
-                        self.model_tester.image_size,
-                    ),
-                    name="pixel_values",
-                    dtype="float32",
-                )
-            elif model_class.__name__ in ["TFCLIPModel", "TFGroupViTModel", "TFBlipModel"]:
-                inputs = {
-                    "input_ids": tf.keras.Input(batch_shape=(3, max_input), name="input_ids", dtype="int32"),
-                    "pixel_values": tf.keras.Input(
-                        batch_shape=(
-                            3,
-                            self.model_tester.vision_model_tester.num_channels,
-                            self.model_tester.vision_model_tester.image_size,
-                            self.model_tester.vision_model_tester.image_size,
-                        ),
-                        name="pixel_values",
-                        dtype="float32",
-                    ),
-                }
-            elif model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
-                inputs = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32")
-            else:
-                inputs = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32")
-    
-            # Prepare our model
-            model = model_class(config)
-            model(self._prepare_for_class(inputs_dict, model_class))  # Model must be called before saving.
-            # Let's load it from the disk to be sure we can use pretrained weights
-            with tempfile.TemporaryDirectory() as tmpdirname:
-                model.save_pretrained(tmpdirname, saved_model=False)
->               model = model_class.from_pretrained(tmpdirname)
-
-tests/test_modeling_tf_common.py:827: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-src/transformers/modeling_tf_utils.py:2810: in from_pretrained
-    model(model.dummy_inputs)  # build the network with dummy inputs
-.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler
-    raise e.with_traceback(filtered_tb) from None
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab3b5a160>
-args = ({'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'input_features': <...],
-        [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337,
-         0.2877066 , 0.8109739 ]]], dtype=float32)>},)
-kwargs = {}
-fn_args_and_kwargs = {'input_features': {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, '...2292, 0.9640751 , 0.32960486, ..., 0.53566337,
-         0.2877066 , 0.8109739 ]]], dtype=float32)>}, 'kwargs_call': {}}
-config = WhisperConfig {
-  "_name_or_path": "/tmp/tmpa3qgulea",
-  "activation_dropout": 0.0,
-  "activation_function": "gelu",
- ..."transformers_version": "4.30.0.dev0",
-  "use_cache": true,
-  "use_weighted_layer_sum": false,
-  "vocab_size": 51865
-}
-
-unpacked_inputs = {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'encoder_outputs': N...        [0.18482292, 0.9640751 , 0.32960486, ..., 0.53566337,
-         0.2877066 , 0.8109739 ]]], dtype=float32)>, ...}
-
-    @functools.wraps(func)
-    def run_call_with_unpacked_inputs(self, *args, **kwargs):
-        # isolates the actual `**kwargs` for the decorated function
-        kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)}
-        fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call}
-        fn_args_and_kwargs.update({"kwargs_call": kwargs_call})
-    
-        # move any arg into kwargs, if they exist
-        fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
-    
-        # Encoder Decoder models delegate the application of the configuration options to their inner models.
-        if "EncoderDecoder" in self.__class__.__name__:
-            config = None
-        else:
-            config = self.config
-    
-        unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs)
->       return func(self, **unpacked_inputs)
-E       TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification).
-E       
-E       call() got an unexpected keyword argument 'decoder_input_ids'
-E       
-E       Call arguments received by layer 'tf_whisper_for_audio_classification_4' (type TFWhisperForAudioClassification):
-E         • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'}
-E         • head_mask=None
-E         • encoder_outputs=None
-E         • labels=None
-E         • output_attentions=None
-E         • output_hidden_states=None
-E         • return_dict=None
-
-src/transformers/modeling_tf_utils.py:434: TypeError
-________________ TFWhisperEncoderModelTest.test_encoder_outputs ________________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_encoder_outputs>
-
-    def test_encoder_outputs(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-    
-        for model_class in self.all_model_classes:
-            model = model_class(config)
-    
-            inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
-            outputs = model(**inputs)[0]
-            input_ids = inputs["input_features"]
-            del inputs["input_features"]
-    
-            encoder = model.encoder
-            inputs["encoder_outputs"] = encoder(input_ids)
->           outputs_embeds = model(**inputs)[0]
-
-tests/models/whisper/test_modeling_tf_whisper.py:980: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler
-    raise e.with_traceback(filtered_tb) from None
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-self = <keras.utils.layer_utils.CallFunctionSpec object at 0x7f8ab36e31c0>
-args = ()
-kwargs = {'encoder_outputs': TFBaseModelOutput(last_hidden_state=<tf.Tensor: shape=(13, 30, 16), dtype=float32, numpy=
-array([[...k': <tf.Tensor: shape=(2, 4), dtype=float32, numpy=
-array([[1., 1., 1., 1.],
-       [1., 1., 1., 1.]], dtype=float32)>}
-
-    def split_out_first_arg(self, args, kwargs):
-        """Splits (args, kwargs) into (inputs, args, kwargs)."""
-        # Grab the argument corresponding to the first argument in the
-        # layer's `call` method spec. This will either be the first positional
-        # argument, or it will be provided as a keyword argument.
-        if args:
-            inputs = args[0]
-            args = args[1:]
-        elif self._arg_names[0] in kwargs:
-            kwargs = copy.copy(kwargs)
-            inputs = kwargs.pop(self._arg_names[0])
-        else:
->           raise ValueError(
-                "The first argument to `Layer.call` must always be passed."
-            )
-E           ValueError: The first argument to `Layer.call` must always be passed.
-
-.env/lib/python3.8/site-packages/keras/utils/layer_utils.py:809: ValueError
-_______________ TFWhisperEncoderModelTest.test_forward_signature _______________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_forward_signature>
-
-    def test_forward_signature(self):
-        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
-    
-        for model_class in self.all_model_classes:
-            model = model_class(config)
->           signature = inspect.signature(model.forward)
-E           AttributeError: 'TFWhisperForAudioClassification' object has no attribute 'forward'
-
-tests/models/whisper/test_modeling_tf_whisper.py:955: AttributeError
-_____________ TFWhisperEncoderModelTest.test_hidden_states_output ______________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_hidden_states_output>
-
-    def test_hidden_states_output(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-    
-        def check_hidden_states_output(config, inputs_dict, model_class):
-            model = model_class(config)
-            outputs = model(self._prepare_for_class(inputs_dict, model_class))
-            expected_num_layers = getattr(
-                self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
-            )
-    
-            if model.config.is_encoder_decoder:
-                encoder_hidden_states = outputs.encoder_hidden_states
-                decoder_hidden_states = outputs.decoder_hidden_states
-    
-                self.assertEqual(config.output_attentions, False)
-                self.assertEqual(len(encoder_hidden_states), expected_num_layers)
-                self.assertListEqual(
-                    list(encoder_hidden_states[0].shape[-2:]),
-                    [self.model_tester.seq_length, self.model_tester.hidden_size],
-                )
-                self.assertEqual(len(decoder_hidden_states), expected_num_layers)
-                self.assertListEqual(
-                    list(decoder_hidden_states[0].shape[-2:]),
-                    [self.model_tester.seq_length, self.model_tester.hidden_size],
-                )
-            else:
-                hidden_states = outputs.hidden_states
-                self.assertEqual(config.output_attentions, False)
-                self.assertEqual(len(hidden_states), expected_num_layers)
-                self.assertListEqual(
-                    list(hidden_states[0].shape[-2:]),
-                    [self.model_tester.seq_length, self.model_tester.hidden_size],
-                )
-    
-        for model_class in self.all_model_classes:
-            inputs_dict["output_hidden_states"] = True
->           check_hidden_states_output(config, inputs_dict, model_class)
-
-tests/test_modeling_tf_common.py:1028: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-tests/test_modeling_tf_common.py:1021: in check_hidden_states_output
-    self.assertListEqual(
-E   AssertionError: Lists differ: [30, 16] != [60, 16]
-E   
-E   First differing element 0:
-E   30
-E   60
-E   
-E   - [30, 16]
-E   ?  ^
-E   
-E   + [60, 16]
-E   ?  ^
-________________ TFWhisperEncoderModelTest.test_onnx_compliancy ________________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_onnx_compliancy>
-
-    def test_onnx_compliancy(self):
->       if not self.test_onnx:
-E       AttributeError: 'TFWhisperEncoderModelTest' object has no attribute 'test_onnx'
-
-tests/test_modeling_tf_common.py:343: AttributeError
-____________ TFWhisperEncoderModelTest.test_prepare_serving_output _____________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_prepare_serving_output>
-
-    def test_prepare_serving_output(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        config.output_hidden_states = True
-        config.output_attentions = self.has_attentions
-    
-        for model_class in self.all_model_classes:
-            model = model_class(config)
-            inputs = self._prepare_for_class(inputs_dict, model_class)
-            outputs = model(inputs)
->           serving_outputs = model.serving_output(outputs)
-
-tests/test_modeling_tf_common.py:300: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab3909dc0>
-output = TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(13, 2), dtype=float32, numpy=
-array([[1.8147802, 1.989...
-         [0.05317271, 0.03583057, 0.03193213, ..., 0.02386571,
-          0.02572016, 0.02103928]]]], dtype=float32)>))
-
-    def serving_output(self, output):
-        """
-        Prepare the output of the saved model. Each model must implement this function.
-    
-        Args:
-            output ([`TFBaseModelOutput`]):
-                The output returned by the model.
-        """
->       raise NotImplementedError
-E       NotImplementedError
-
-src/transformers/modeling_tf_utils.py:1236: NotImplementedError
-____________ TFWhisperEncoderModelTest.test_pt_tf_model_equivalence ____________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_pt_tf_model_equivalence>
-allow_missing_keys = False
-
-    @is_pt_tf_cross_test
-    def test_pt_tf_model_equivalence(self, allow_missing_keys=False):
-        import transformers
-    
-        for model_class in self.all_model_classes:
-            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-    
-            # Output all for aggressive testing
-            config.output_hidden_states = True
-            config.output_attentions = self.has_attentions
-    
-            # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency
-            # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`.
-            # TODO: Use a uniform value for all models, make sure all tests pass without this processing, and remove it.
-            self._make_attention_mask_non_null(inputs_dict)
-    
-            pt_model_class_name = model_class.__name__[2:]  # Skip the "TF" at the beginning
-            pt_model_class = getattr(transformers, pt_model_class_name)
-    
-            tf_model = model_class(config)
-            pt_model = pt_model_class(config)
-    
-            tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
-            tf_inputs_dict_with_labels = self._prepare_for_class(
-                inputs_dict,
-                model_class,
-                # Not all models accept "labels" in the forward pass (yet :) )
-                return_labels=True if "labels" in inspect.signature(model_class.call).parameters.keys() else False,
-            )
-    
-            # For some models (e.g. base models), there is no label returned.
-            # Set the input dict to `None` to avoid check outputs twice for the same input dicts.
-            if not set(tf_inputs_dict_with_labels.keys()).symmetric_difference(tf_inputs_dict.keys()):
-                tf_inputs_dict_with_labels = None
-    
-            # Check we can load pt model in tf and vice-versa with model => model functions
->           tf_model = transformers.load_pytorch_model_in_tf2_model(
-                tf_model, pt_model, tf_inputs=tf_inputs_dict, allow_missing_keys=allow_missing_keys
-            )
-
-tests/test_modeling_tf_common.py:706: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-src/transformers/modeling_tf_pytorch_utils.py:204: in load_pytorch_model_in_tf2_model
-    return load_pytorch_weights_in_tf2_model(
-src/transformers/modeling_tf_pytorch_utils.py:230: in load_pytorch_weights_in_tf2_model
-    return load_pytorch_state_dict_in_tf2_model(
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-tf_model = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab36988b0>
-pt_state_dict = {'classifier.bias': array([0., 0.], dtype=float32), 'classifier.weight': array([[-0.00916307,  0.00247839,  0.01647531...       [ 0.04591416, -0.00516921,  0.03591024],
-        [-0.01335885, -0.01553431, -0.0269356 ]]], dtype=float32), ...}
-tf_inputs = {'head_mask': <tf.Tensor: shape=(2, 4), dtype=float32, numpy=
-array([[1., 1., 1., 1.],
-       [1., 1., 1., 1.]], dtype...53],
-        [0.87383145, 0.34678572, 0.2766716 , ..., 0.37082306,
-         0.825713  , 0.6365404 ]]], dtype=float32)>}
-allow_missing_keys = False, output_loading_info = False, _prefix = ''
-tf_to_pt_weight_rename = None, ignore_mismatched_sizes = False
-
-    def load_pytorch_state_dict_in_tf2_model(
-        tf_model,
-        pt_state_dict,
-        tf_inputs=None,
-        allow_missing_keys=False,
-        output_loading_info=False,
-        _prefix=None,
-        tf_to_pt_weight_rename=None,
-        ignore_mismatched_sizes=False,
-    ):
-        """Load a pytorch state_dict in a TF 2.0 model."""
-        import tensorflow as tf
-        from packaging.version import parse
-    
-        if parse(tf.__version__) >= parse("2.11.0"):
-            from keras import backend as K
-        else:
-            from tensorflow.python.keras import backend as K
-    
-        if tf_inputs is None:
-            tf_inputs = tf_model.dummy_inputs
-    
-        if _prefix is None:
-            _prefix = ""
-        if tf_inputs is not None:
-            with tf.name_scope(_prefix):
-                tf_model(tf_inputs, training=False)  # Make sure model is built
-        # Adapt state dict - TODO remove this and update the AWS weights files instead
-        # Convert old format to new format if needed from a PyTorch state_dict
-        old_keys = []
-        new_keys = []
-        for key in pt_state_dict.keys():
-            new_key = None
-            if "gamma" in key:
-                new_key = key.replace("gamma", "weight")
-            if "beta" in key:
-                new_key = key.replace("beta", "bias")
-            if "running_var" in key:
-                new_key = key.replace("running_var", "moving_variance")
-            if "running_mean" in key:
-                new_key = key.replace("running_mean", "moving_mean")
-            if new_key:
-                old_keys.append(key)
-                new_keys.append(new_key)
-        for old_key, new_key in zip(old_keys, new_keys):
-            pt_state_dict[new_key] = pt_state_dict.pop(old_key)
-    
-        # Matt: All TF models store the actual model stem in a MainLayer class, including the base model.
-        # In PT, the derived models (with heads) use the base model class as the stem instead, and the base model
-        # just contains the stem itself, and there is no MainLayer class. This means that TF base classes have one
-        # extra layer in their weight names, corresponding to the MainLayer class. This code block compensates for that.
-        start_prefix_to_remove = ""
-        if not any(s.startswith(tf_model.base_model_prefix) for s in pt_state_dict.keys()):
-            start_prefix_to_remove = tf_model.base_model_prefix + "."
-    
-        symbolic_weights = tf_model.trainable_weights + tf_model.non_trainable_weights
-        tf_loaded_numel = 0
-        weight_value_tuples = []
-        all_pytorch_weights = set(pt_state_dict.keys())
-        missing_keys = []
-        mismatched_keys = []
-        for symbolic_weight in symbolic_weights:
-            sw_name = symbolic_weight.name
-            name, transpose = convert_tf_weight_name_to_pt_weight_name(
-                sw_name,
-                start_prefix_to_remove=start_prefix_to_remove,
-                tf_weight_shape=symbolic_weight.shape,
-                name_scope=_prefix,
-            )
-            if tf_to_pt_weight_rename is not None:
-                name = tf_to_pt_weight_rename(name)
-    
-            # Find associated numpy array in pytorch model state dict
-            if name not in pt_state_dict:
-                if allow_missing_keys:
-                    missing_keys.append(name)
-                    continue
-                elif tf_model._keys_to_ignore_on_load_missing is not None:
-                    # authorized missing keys don't have to be loaded
-                    if any(re.search(pat, name) is not None for pat in tf_model._keys_to_ignore_on_load_missing):
-                        continue
->               raise AttributeError(f"{name} not found in PyTorch model")
-E               AttributeError: tf_whisper_encoder_17.conv1.weight not found in PyTorch model
-
-src/transformers/modeling_tf_pytorch_utils.py:322: AttributeError
-____________ TFWhisperEncoderModelTest.test_resize_token_embeddings ____________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_resize_token_embeddings>
-
-    def test_resize_token_embeddings(self):
-        # TODO (joao): after the embeddings refactor is complete, rework this test so as to rely exclusively on
-        # tf.keras.layers.Embedding
-    
-        if not self.test_resize_embeddings:
-            return
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-    
-        def _get_word_embedding_weight(model, embedding_layer):
-            if isinstance(embedding_layer, tf.keras.layers.Embedding):
-                # builds the embeddings layer
-                model(model.dummy_inputs)
-                return embedding_layer.embeddings
-            else:
-                return model._get_word_embedding_weight(embedding_layer)
-    
-        for model_class in self.all_model_classes:
-            for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
-                # build the embeddings
-                model = model_class(config=copy.deepcopy(config))  # `resize_token_embeddings` mutates `config`
->               old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
-
-tests/test_modeling_tf_common.py:1211: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab3b77d30>
-
-    def get_input_embeddings(self) -> tf.keras.layers.Layer:
-        """
-        Returns the model's input embeddings layer.
-    
-        Returns:
-            `tf.Variable`: The embeddings layer mapping vocabulary to hidden states.
-        """
-        main_layer = getattr(self, self.base_model_prefix, self)
-    
-        if main_layer is not self:
-            return main_layer.get_input_embeddings()
-        else:
->           raise NotImplementedError
-E           NotImplementedError
-
-src/transformers/modeling_tf_utils.py:1262: NotImplementedError
-___________________ TFWhisperEncoderModelTest.test_save_load ___________________
-
-self = <tests.models.whisper.test_modeling_tf_whisper.TFWhisperEncoderModelTest testMethod=test_save_load>
-
-    def test_save_load(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-    
-        for model_class in self.all_model_classes:
-            model = model_class(config)
-            outputs = model(self._prepare_for_class(inputs_dict, model_class))
-    
-            with tempfile.TemporaryDirectory() as tmpdirname:
-                model.save_pretrained(tmpdirname, saved_model=False)
-    
-                # the config file (and the generation config file, if it can generate) should be saved
-                self.assertTrue(os.path.exists(os.path.join(tmpdirname, CONFIG_NAME)))
-                self.assertEqual(
-                    model.can_generate(), os.path.exists(os.path.join(tmpdirname, GENERATION_CONFIG_NAME))
-                )
-    
->               model = model_class.from_pretrained(tmpdirname)
-
-tests/test_modeling_tf_common.py:247: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-src/transformers/modeling_tf_utils.py:2810: in from_pretrained
-    model(model.dummy_inputs)  # build the network with dummy inputs
-.env/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70: in error_handler
-    raise e.with_traceback(filtered_tb) from None
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-self = <transformers.models.whisper.modeling_tf_whisper.TFWhisperForAudioClassification object at 0x7f8ab352caf0>
-args = ({'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'input_features': <...],
-        [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773,
-         0.8213068 , 0.2709607 ]]], dtype=float32)>},)
-kwargs = {}
-fn_args_and_kwargs = {'input_features': {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, '...9569, 0.1321398 , 0.4477892 , ..., 0.09110773,
-         0.8213068 , 0.2709607 ]]], dtype=float32)>}, 'kwargs_call': {}}
-config = WhisperConfig {
-  "_name_or_path": "/tmp/tmpozgmlb9x",
-  "activation_dropout": 0.0,
-  "activation_function": "gelu",
- ..."transformers_version": "4.30.0.dev0",
-  "use_cache": true,
-  "use_weighted_layer_sum": false,
-  "vocab_size": 51865
-}
-
-unpacked_inputs = {'decoder_input_ids': <tf.Tensor: shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>, 'encoder_outputs': N...        [0.00129569, 0.1321398 , 0.4477892 , ..., 0.09110773,
-         0.8213068 , 0.2709607 ]]], dtype=float32)>, ...}
-
-    @functools.wraps(func)
-    def run_call_with_unpacked_inputs(self, *args, **kwargs):
-        # isolates the actual `**kwargs` for the decorated function
-        kwargs_call = {key: val for key, val in kwargs.items() if key not in dict(original_signature.parameters)}
-        fn_args_and_kwargs = {key: val for key, val in kwargs.items() if key not in kwargs_call}
-        fn_args_and_kwargs.update({"kwargs_call": kwargs_call})
-    
-        # move any arg into kwargs, if they exist
-        fn_args_and_kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
-    
-        # Encoder Decoder models delegate the application of the configuration options to their inner models.
-        if "EncoderDecoder" in self.__class__.__name__:
-            config = None
-        else:
-            config = self.config
-    
-        unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs)
->       return func(self, **unpacked_inputs)
-E       TypeError: Exception encountered when calling layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification).
-E       
-E       call() got an unexpected keyword argument 'decoder_input_ids'
-E       
-E       Call arguments received by layer 'tf_whisper_for_audio_classification_20' (type TFWhisperForAudioClassification):
-E         • input_features={'input_features': 'tf.Tensor(shape=(2, 80, 59), dtype=float32)', 'decoder_input_ids': 'tf.Tensor(shape=(1, 2), dtype=int32)'}
-E         • head_mask=None
-E         • encoder_outputs=None
-E         • labels=None
-E         • output_attentions=None
-E         • output_hidden_states=None
-E         • return_dict=None
-
-src/transformers/modeling_tf_utils.py:434: TypeError
-=============================== warnings summary ===============================
-.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246
-  /home/adit299/transformers/.env/lib/python3.8/site-packages/tensorflow/python/framework/dtypes.py:246: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)
-    np.bool8: (False, True),
-
-src/transformers/models/open_llama/modeling_open_llama.py:42
-  /home/adit299/transformers/src/transformers/models/open_llama/modeling_open_llama.py:42: DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead
-    logger.warn(
-
--- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
-=========================== short test summary info ============================
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_compile_tf_model
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_encoder_outputs
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_forward_signature
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_hidden_states_output
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_onnx_compliancy
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_prepare_serving_output
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_pt_tf_model_equivalence
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_resize_token_embeddings
-FAILED tests/models/whisper/test_modeling_tf_whisper.py::TFWhisperEncoderModelTest::test_save_load
-============= 9 failed, 25 passed, 6 skipped, 2 warnings in 39.74s =============