Skip to content
Closed
2 changes: 2 additions & 0 deletions mteb/models/clap_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def _handle_batch(
if "array" in item:
audio = item["array"]
# Convert to torch tensor and ensure float32
if isinstance(audio, list):
audio = np.array(audio)
audio = (
torch.from_numpy(audio).float()
if isinstance(audio, np.ndarray)
Expand Down
2 changes: 2 additions & 0 deletions mteb/models/wav2vec2_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ def _handle_batch(
if isinstance(item, dict):
if "array" in item:
audio = item["array"]
if isinstance(audio, list):
audio = np.array(audio)
audio = (
torch.from_numpy(audio).float()
if isinstance(audio, np.ndarray)
Expand Down
1 change: 1 addition & 0 deletions mteb/tasks/Audio/AudioClassification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from .eng.GTZANGenre import *
from .eng.GunshotTriangulation import *
from .eng.NSynth import *
from .eng.VaaniGenderClassification import *
from .eng.VoxLingua107Top10 import *
Loading