diff --git a/mteb/models/whisper_models.py b/mteb/models/whisper_models.py
index d59e3a4493..64a740b817 100644
--- a/mteb/models/whisper_models.py
+++ b/mteb/models/whisper_models.py
@@ -114,13 +114,14 @@ def get_audio_embeddings(
         with torch.no_grad():
             for i in tqdm(range(0, len(processed_audio), batch_size)):
                 batch = processed_audio[i : i + batch_size]
-                batch = self._pad_audio_batch(batch)
+                batch_arrays = [tensor.numpy() for tensor in batch]
 
-                inputs = self.processor.feature_extractor(
-                    batch,
+                inputs = self.processor(
+                    batch_arrays,
                     sampling_rate=self.sampling_rate,
                     return_tensors="pt",
-                    padding="longest",
+                    padding="max_length",
+                    max_length=None,
                     return_attention_mask=True,
                 ).to(self.device)