diff --git a/mteb/models/whisper_models.py b/mteb/models/whisper_models.py index d59e3a4493..64a740b817 100644 --- a/mteb/models/whisper_models.py +++ b/mteb/models/whisper_models.py @@ -114,13 +114,14 @@ def get_audio_embeddings( with torch.no_grad(): for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] - batch = self._pad_audio_batch(batch) + batch_arrays = [tensor.numpy() for tensor in batch] - inputs = self.processor.feature_extractor( - batch, + inputs = self.processor( + batch_arrays, sampling_rate=self.sampling_rate, return_tensors="pt", - padding="longest", + padding="max_length", + max_length=None, return_attention_mask=True, ).to(self.device)