diff --git a/mteb/models/ast_model.py b/mteb/models/ast_model.py index 966877cc91..48b56585a5 100644 --- a/mteb/models/ast_model.py +++ b/mteb/models/ast_model.py @@ -93,17 +93,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] # AST processes raw waveforms directly through its feature extractor diff --git a/mteb/models/clap_models.py b/mteb/models/clap_models.py index a0f29d2ff3..6f03c451a6 100644 --- a/mteb/models/clap_models.py +++ b/mteb/models/clap_models.py @@ -109,16 +109,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> np.ndarray: all_features = [] processed_audio = self._process_audio(audio) for i in tqdm( - range(0, len(processed_audio), batch_size), - desc="Processing audio batches", - disable=not show_progress_bar, + range(0, len(processed_audio), batch_size), desc="Processing audio batches" ): batch = processed_audio[i : i + batch_size] batch_arrays = [tensor.numpy() for tensor in batch] @@ -128,8 +125,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding=True, - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max ) inputs = {k: v.to(self.device) for k, v in inputs.items()} diff --git a/mteb/models/cnn14_model.py b/mteb/models/cnn14_model.py index 1ac11e2225..4b6035e7ca 100644 --- a/mteb/models/cnn14_model.py +++ b/mteb/models/cnn14_model.py @@ -92,14 +92,7 @@ def _handle_batch( def _convert_audio(self, audio: AudioData) -> torch.Tensor: if isinstance(audio, np.ndarray): audio = torch.from_numpy(audio) - audio = audio.squeeze() - - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if audio.shape[-1] > max_length: - audio = audio[..., :max_length] - - return audio + return audio.squeeze() def _load_audio_file(self, path: str) -> torch.Tensor: waveform, sample_rate = torchaudio.load(path) @@ -120,17 +113,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] # Convert batch to tensors and move to device diff --git a/mteb/models/data2vec_models.py b/mteb/models/data2vec_models.py index 6c5403d197..de2f72e112 100644 --- a/mteb/models/data2vec_models.py +++ b/mteb/models/data2vec_models.py @@ -103,17 +103,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] # Pre-process audio @@ -129,8 +125,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding="longest", - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max return_attention_mask=True, ).to(self.device) diff --git a/mteb/models/encodec_model.py b/mteb/models/encodec_model.py index 8f0c4d43da..28dd2c896c 100644 --- a/mteb/models/encodec_model.py +++ b/mteb/models/encodec_model.py @@ -106,17 +106,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] # Process audio through EnCodec's processor @@ -125,8 +121,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding=True, - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max ).to(self.device) # Get the latent representations directly from the encoder diff --git a/mteb/models/hubert_models.py b/mteb/models/hubert_models.py index cdc5e6bb88..6c1965e940 100644 --- a/mteb/models/hubert_models.py +++ b/mteb/models/hubert_models.py @@ -103,17 +103,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] # Pre-process like Wav2Vec2 @@ -129,8 +125,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding="longest", - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max return_attention_mask=True, ).to(self.device) diff --git a/mteb/models/mctct_model.py b/mteb/models/mctct_model.py index ac16b16c61..ec9273fa6d 100644 --- a/mteb/models/mctct_model.py +++ b/mteb/models/mctct_model.py @@ -167,17 +167,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] # Process each audio in the batch @@ -186,8 +182,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding=True, - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max ).to(self.device) # Get embeddings from the model diff --git a/mteb/models/mms_models.py b/mteb/models/mms_models.py index e32be1b065..760c5725a9 100644 --- a/mteb/models/mms_models.py +++ b/mteb/models/mms_models.py @@ -121,17 +121,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] batch_tensor = self._pad_audio_batch(batch) @@ -146,8 +142,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding="longest", - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max return_attention_mask=True, ).to(self.device) diff --git a/mteb/models/msclap_models.py b/mteb/models/msclap_models.py index f9aedc4186..761ddfa98d 100644 --- a/mteb/models/msclap_models.py +++ b/mteb/models/msclap_models.py @@ -93,11 +93,6 @@ def _handle_batch( ) audio_array = resampler(audio_array) - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if audio_array.shape[-1] > max_length: - audio_array = audio_array[..., :max_length] - # Only squeeze here, don't call _convert_audio again waveforms.append(audio_array.squeeze()) elif "path" in item: @@ -112,14 +107,7 @@ def _handle_batch( def _convert_audio(self, audio: AudioData) -> torch.Tensor: if isinstance(audio, np.ndarray): audio = torch.from_numpy(audio) - audio = audio.squeeze().float() # Ensure float32 - - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if audio.shape[-1] > max_length: - audio = audio[..., :max_length] - - return audio + return audio.squeeze().float() # Ensure float32 def _load_audio_file(self, path: str) -> torch.Tensor: waveform, sample_rate = torchaudio.load(path) @@ -127,12 +115,6 @@ def _load_audio_file(self, path: str) -> torch.Tensor: if sample_rate != self.sampling_rate: resampler = torchaudio.transforms.Resample(sample_rate, self.sampling_rate) waveform = resampler(waveform) - - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if waveform.shape[-1] > max_length: - waveform = waveform[..., :max_length] - return waveform.squeeze() def get_audio_embeddings( @@ -142,16 +124,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> np.ndarray: all_features = [] processed_audio = self._process_audio(audio) for i in tqdm( - range(0, len(processed_audio), batch_size), - desc="Processing audio batches", - disable=not show_progress_bar, + range(0, len(processed_audio), batch_size), desc="Processing audio batches" ): batch = processed_audio[i : i + batch_size] diff --git a/mteb/models/muq_mulan_model.py b/mteb/models/muq_mulan_model.py index d11a965e98..ae61b968da 100644 --- a/mteb/models/muq_mulan_model.py +++ b/mteb/models/muq_mulan_model.py @@ -90,14 +90,7 @@ def _convert_audio(self, audio: AudioData) -> torch.Tensor: """Convert audio data to torch tensor.""" if isinstance(audio, np.ndarray): audio = torch.from_numpy(audio) - audio = audio.squeeze().float() # Ensure float32 - - # Apply audio truncation (30 seconds max) - max_length = 30 * self.target_sampling_rate # 30 seconds - if audio.shape[-1] > max_length: - audio = audio[..., :max_length] - - return audio + return audio.squeeze().float() # Ensure float32 def _load_audio_file(self, path: str) -> torch.Tensor: """Load audio file and resample to target sampling rate.""" @@ -116,7 +109,6 @@ def get_audio_embeddings( self, audio: AudioBatch, *, - show_progress_bar: bool = True, task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, @@ -124,13 +116,10 @@ def get_audio_embeddings( ) -> np.ndarray: """Get audio embeddings using MuQ-MuLan.""" all_features = [] - processed_audio = self._process_audio(audio) for i in tqdm( - range(0, len(processed_audio), batch_size), - desc="Processing audio batches", - disable=not show_progress_bar, + range(0, len(processed_audio), batch_size), desc="Processing audio batches" ): batch = processed_audio[i : i + batch_size] batch_features = self._process_audio_batch(batch) diff --git a/mteb/models/qwen2_models.py b/mteb/models/qwen2_models.py index e176d4f31f..fbe660df3c 100644 --- a/mteb/models/qwen2_models.py +++ b/mteb/models/qwen2_models.py @@ -8,7 +8,6 @@ import torch import torchaudio from torch.utils.data import DataLoader -from tqdm import tqdm from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration from mteb.encoder_interface import AudioBatch, AudioData, PromptType @@ -82,14 +81,7 @@ def _convert_audio_from_numpy(self, audio: AudioData) -> torch.Tensor: audio = torch.from_numpy(audio) if audio.ndim == 2: audio = audio.mean(dim=0) - audio = audio.squeeze() - - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if audio.shape[-1] > max_length: - audio = audio[..., :max_length] - - return audio + return audio.squeeze() def _load_audio_file(self, path: str) -> torch.Tensor: waveform, sr = torchaudio.load(path) @@ -98,14 +90,7 @@ def _load_audio_file(self, path: str) -> torch.Tensor: if sr != self.sampling_rate: resampler = torchaudio.transforms.Resample(sr, self.sampling_rate) waveform = resampler(waveform) - waveform = waveform.squeeze() - - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if waveform.shape[-1] > max_length: - waveform = waveform[..., :max_length] - - return waveform + return waveform.squeeze() def _pad_audio_batch(self, batch: list[torch.Tensor]) -> torch.Tensor: max_len = max(w.shape[0] for w in batch) @@ -119,16 +104,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed = self._process_audio(audio) embeddings_list: list[torch.Tensor] = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed), batch_size), disable=not show_progress_bar - ): + for i in range(0, len(processed), batch_size): batch = processed[i : i + batch_size] audio_list = [w.numpy() for w in batch] diff --git a/mteb/models/seamlessm4t_models.py b/mteb/models/seamlessm4t_models.py index 1fb6b3d0dd..ce2cca3305 100644 --- a/mteb/models/seamlessm4t_models.py +++ b/mteb/models/seamlessm4t_models.py @@ -101,17 +101,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] batch_tensor = self._pad_audio_batch(batch) @@ -121,8 +117,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding=True, - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max ).to(self.device) # Get encodings through the encoder diff --git a/mteb/models/speecht5_models.py b/mteb/models/speecht5_models.py index c0d438744f..0693377f23 100644 --- a/mteb/models/speecht5_models.py +++ b/mteb/models/speecht5_models.py @@ -119,17 +119,13 @@ def get_audio_embeddings( prompt_type: PromptType | None = None, batch_size: int = 4, hidden_layer: float = 1.0, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] batch_tensor = self._pad_audio_batch(batch) @@ -144,8 +140,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding="longest", - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max return_attention_mask=True, ).to(self.device) diff --git a/mteb/models/unispeech_models.py b/mteb/models/unispeech_models.py index 955997b17b..7bcde20f44 100644 --- a/mteb/models/unispeech_models.py +++ b/mteb/models/unispeech_models.py @@ -103,17 +103,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] # Pre-process like Wav2Vec2 @@ -129,8 +125,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding="longest", - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max return_attention_mask=True, ).to(self.device) diff --git a/mteb/models/vggish_models.py b/mteb/models/vggish_models.py index e132a0c691..528a42b04d 100644 --- a/mteb/models/vggish_models.py +++ b/mteb/models/vggish_models.py @@ -60,11 +60,6 @@ def _normalize_audio(self, audio): if audio.ndim > 1: audio = audio.mean(dim=0) - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if audio.shape[-1] > max_length: - audio = audio[..., :max_length] - # Normalize to [-1.0, 1.0] if audio.abs().max() > 1.0: audio = audio / audio.abs().max() @@ -161,14 +156,7 @@ def _prepare_input_tensor(self, audio_data): return input_tensor def get_audio_embeddings( - self, - audio, - *, - task_name=None, - prompt_type=None, - batch_size=4, - show_progress_bar=True, - **kwargs, + self, audio, *, task_name=None, prompt_type=None, batch_size=4, **kwargs ): """Generate embeddings for audio inputs.""" processed_audio = self._process_audio(audio) @@ -176,9 +164,7 @@ def get_audio_embeddings( with torch.no_grad(): for i in tqdm( - range(0, len(processed_audio), batch_size), - desc="Processing audio", - disable=not show_progress_bar, + range(0, len(processed_audio), batch_size), desc="Processing audio" ): batch = processed_audio[i : i + batch_size] batch_embeddings = [] diff --git a/mteb/models/wav2clip_model.py b/mteb/models/wav2clip_model.py index b02885b475..0555cebb1d 100644 --- a/mteb/models/wav2clip_model.py +++ b/mteb/models/wav2clip_model.py @@ -63,12 +63,6 @@ def _handle_batch( item["sampling_rate"], self.sampling_rate ) tensor = resampler(tensor) - - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if tensor.shape[-1] > max_length: - tensor = tensor[..., :max_length] - waveforms.append(tensor) # dict with path @@ -105,7 +99,6 @@ def get_audio_embeddings( self, audio: AudioBatch, *, - show_progress_bar: bool = True, task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, @@ -115,9 +108,7 @@ def get_audio_embeddings( if isinstance(audio, DataLoader): # Process each DataLoader batch separately - for batch in tqdm( - audio, desc="Processing audio batches", disable=not show_progress_bar - ): + for batch in tqdm(audio, desc="Processing audio batches"): wavs = self._handle_batch(batch) batch_embeddings = self._process_audio_batch(wavs, batch_size) all_embeddings.extend(batch_embeddings) diff --git a/mteb/models/wav2vec2_models.py b/mteb/models/wav2vec2_models.py index 229e315065..ea7c17d45c 100644 --- a/mteb/models/wav2vec2_models.py +++ b/mteb/models/wav2vec2_models.py @@ -159,17 +159,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] inputs = self.feature_extractor( @@ -177,8 +173,6 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding=True, - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max return_attention_mask=True, ).to(self.device) diff --git a/mteb/models/wavlm_models.py b/mteb/models/wavlm_models.py index 11fc5dbdaa..2c2b117a6e 100644 --- a/mteb/models/wavlm_models.py +++ b/mteb/models/wavlm_models.py @@ -8,7 +8,6 @@ import torch import torchaudio from torch.utils.data import DataLoader -from tqdm import tqdm from transformers import Wav2Vec2FeatureExtractor, WavLMModel from mteb.encoder_interface import AudioBatch, AudioData, PromptType @@ -109,17 +108,13 @@ def get_audio_embeddings( task_name: str | None = None, prompt_type: PromptType | None = None, batch_size: int = 4, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in range(0, len(processed_audio), batch_size): batch = processed_audio[i : i + batch_size] batch_tensor = self._pad_audio_batch(batch) diff --git a/mteb/models/whisper_models.py b/mteb/models/whisper_models.py index b8e0d7079b..64a740b817 100644 --- a/mteb/models/whisper_models.py +++ b/mteb/models/whisper_models.py @@ -106,17 +106,13 @@ def get_audio_embeddings( prompt_type: PromptType | None = None, batch_size: int = 4, hidden_layer: float = 1.0, - show_progress_bar: bool = True, **kwargs: Any, ) -> torch.Tensor: processed_audio = self._process_audio(audio) all_embeddings = [] with torch.no_grad(): - for i in tqdm( - range(0, len(processed_audio), batch_size), - disable=not show_progress_bar, - ): + for i in tqdm(range(0, len(processed_audio), batch_size)): batch = processed_audio[i : i + batch_size] batch_arrays = [tensor.numpy() for tensor in batch] @@ -125,8 +121,7 @@ def get_audio_embeddings( sampling_rate=self.sampling_rate, return_tensors="pt", padding="max_length", - truncation=True, - max_length=30 * self.sampling_rate, # 30 seconds max + max_length=None, return_attention_mask=True, ).to(self.device) diff --git a/mteb/models/yamnet_models.py b/mteb/models/yamnet_models.py index 36d7617682..7efd8aae18 100644 --- a/mteb/models/yamnet_models.py +++ b/mteb/models/yamnet_models.py @@ -60,11 +60,6 @@ def _normalize_audio(self, audio): if audio.ndim > 1: audio = audio.mean(dim=0) - # Apply audio truncation (30 seconds max) - max_length = 30 * self.sampling_rate # 30 seconds - if audio.shape[-1] > max_length: - audio = audio[..., :max_length] - # Normalize to [-1.0, 1.0] if audio.abs().max() > 1.0: audio = audio / audio.abs().max() @@ -161,14 +156,7 @@ def _prepare_input_tensor(self, audio_data): return input_tensor def get_audio_embeddings( - self, - audio, - *, - task_name=None, - prompt_type=None, - batch_size=4, - show_progress_bar=True, - **kwargs, + self, audio, *, task_name=None, prompt_type=None, batch_size=4, **kwargs ): """Generate embeddings for audio inputs.""" processed_audio = self._process_audio(audio) @@ -176,9 +164,7 @@ def get_audio_embeddings( with torch.no_grad(): for i in tqdm( - range(0, len(processed_audio), batch_size), - desc="Processing audio", - disable=not show_progress_bar, + range(0, len(processed_audio), batch_size), desc="Processing audio" ): batch = processed_audio[i : i + batch_size] batch_embeddings = []