Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

TASK_SUBTYPE = Literal[
"Article retrieval",
"Accent identification",
"Conversational retrieval",
"Dialect pairing",
"Dialog Systems",
Expand Down Expand Up @@ -61,20 +62,23 @@
"Stroke Classification of Musical Instrument",
"Tonic Classification of Musical Instrument",
"Speaker Count Identification",
"Species Classification",
"Spoken Digit Classification",
"Gender Clustering",
"Music Clustering",
"Rendered semantic textual similarity",
"Sentiment Analysis",
"Intent Classification",
"Vehicle Clustering",
"Environment Sound Clustering",
"Rendered semantic textual similarity",
"Gender Classification",
"Age Classification",
]

TASK_DOMAIN = Literal[
"Academic",
"Bioacoustics",
"Blog",
"Constructed",
"Encyclopaedic",
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Audio/AudioClassification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from .eng.FSDD import *
from .eng.GTZANGenre import *
from .eng.GunshotTriangulation import *
from .eng.IEMOCAPEmotion import *
from .eng.IEMOCAPGender import *
from .eng.LibriCount import *
from .eng.MridinghamStroke import *
from .eng.MridinghamTonic import *
Expand Down
92 changes: 92 additions & 0 deletions mteb/tasks/Audio/AudioClassification/eng/IEMOCAPEmotion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from __future__ import annotations

from mteb.abstasks.Audio.AbsTaskAudioClassification import (
AbsTaskAudioClassification,
)
from mteb.abstasks.TaskMetadata import TaskMetadata


class IEMOCAPEmotionClassification(AbsTaskAudioClassification):
metadata = TaskMetadata(
name="IEMOCAPEmotion",
description="Classification of speech samples into emotions (angry, happy, sad, neutral, frustrated, excited, fearful, surprised, disgusted) from interactive emotional dyadic conversations.",
reference="https://doi.org/10.1007/s10579-008-9076-6",
dataset={
"path": "AbstractTTS/IEMOCAP",
"revision": "9f1696a135a65ce997d898d4121c952269a822ca",
},
type="AudioClassification",
category="a2t",
eval_splits=["train"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2008-01-01", "2008-12-31"),
domains=["Spoken", "Speech"],
task_subtypes=["Emotion classification"],
license="cc-by-nc-sa-3.0",
annotations_creators="expert-annotated",
dialect=[],
modalities=["audio"],
sample_creation="created",
bibtex_citation=r"""
@article{busso2008iemocap,
author = {Busso, Carlos and Bulut, Murtaza and Lee, Chi-Chun and Kazemzadeh, Abe and Mower, Emily and Kim, Samuel and Chang, Jeannette N and Lee, Sungbok and Narayanan, Shrikanth S},
journal = {Language resources and evaluation},
number = {4},
pages = {335--359},
publisher = {Springer},
title = {IEMOCAP: Interactive emotional dyadic motion capture database},
volume = {42},
year = {2008},
}
""",
descriptive_stats={
"n_samples": {"train": 10039}, # Approximate after subsampling
},
)

audio_column_name: str = "audio"
label_column_name: str = "emotion"
samples_per_label: int = 10
is_cross_validation: bool = True

def dataset_transform(self):
# Define emotion labels and their mapping to indices
labels = [
"angry", # 0
"sad", # 1
"happy", # 2
"neutral", # 3
"frustrated", # 4
"excited", # 5
"fear", # 6
"surprise", # 7
"disgust", # 8
"other", # 9
]
label2id = {emotion: idx for idx, emotion in enumerate(labels)}

# Basic filtering to ensure we have valid emotion labels
for split in self.dataset:
# First ensure we have valid emotion labels and normalize case
self.dataset[split] = self.dataset[split].filter(
lambda example: example["major_emotion"] is not None
and example["major_emotion"] != ""
)

# Map to indices with case normalization for reliability
self.dataset[split] = self.dataset[split].map(
lambda example: {
"emotion_id": label2id.get(example["major_emotion"].lower(), -1)
}
)

# Filter out any examples with unknown emotions
self.dataset[split] = self.dataset[split].filter(
lambda example: example["emotion_id"] != -1
)

# Use numeric ID as the label
self.dataset[split] = self.dataset[split].rename_column(
"emotion_id", self.label_column_name
)
65 changes: 65 additions & 0 deletions mteb/tasks/Audio/AudioClassification/eng/IEMOCAPGender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from __future__ import annotations

from mteb.abstasks.Audio.AbsTaskAudioClassification import (
AbsTaskAudioClassification,
)
from mteb.abstasks.TaskMetadata import TaskMetadata


class IEMOCAPGenderClassification(AbsTaskAudioClassification):
metadata = TaskMetadata(
name="IEMOCAPGender",
description="Classification of speech samples by speaker gender (male/female) from the IEMOCAP database of interactive emotional dyadic conversations.",
reference="https://doi.org/10.1007/s10579-008-9076-6",
dataset={
"path": "AbstractTTS/IEMOCAP",
"revision": "9f1696a135a65ce997d898d4121c952269a822ca",
},
type="AudioClassification",
category="a2t",
eval_splits=["train"],
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2008-01-01", "2008-12-31"),
domains=["Spoken", "Speech"],
task_subtypes=["Gender Classification"],
license="cc-by-nc-sa-3.0",
annotations_creators="expert-annotated",
dialect=[],
modalities=["audio"],
sample_creation="created",
bibtex_citation=r"""
@article{busso2008iemocap,
author = {Busso, Carlos and Bulut, Murtaza and Lee, Chi-Chun and Kazemzadeh, Abe and Mower, Emily and Kim, Samuel and Chang, Jeannette N and Lee, Sungbok and Narayanan, Shrikanth S},
journal = {Language resources and evaluation},
number = {4},
pages = {335--359},
publisher = {Springer},
title = {IEMOCAP: Interactive emotional dyadic motion capture database},
volume = {42},
year = {2008},
}
""",
descriptive_stats={
"n_samples": {"train": 10039},
},
)

audio_column_name: str = "audio"
label_column_name: str = "gender_id"
samples_per_label: int = 100
is_cross_validation: bool = True

def dataset_transform(self):
# Define label mapping
label2id = {"Female": 0, "Male": 1}

# Apply transformation to all dataset splits
for split in self.dataset:
# Define transform function to add numeric labels
def add_gender_id(example):
example["gender_id"] = label2id[example["gender"]]
return example

print(f"Converting gender labels to numeric IDs for split '{split}'...")
self.dataset[split] = self.dataset[split].map(add_gender_id)