From 854bb470fd2505e36089e0f1590bf57f5091a9b5 Mon Sep 17 00:00:00 2001 From: eppen Date: Sun, 16 Mar 2025 13:27:04 +0900 Subject: [PATCH 1/2] add IEMOCAP --- .../Audio/AudioClassification/__init__.py | 1 + .../Audio/AudioClassification/eng/IEMOCAP.py | 84 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py diff --git a/mteb/tasks/Audio/AudioClassification/__init__.py b/mteb/tasks/Audio/AudioClassification/__init__.py index e0b131e9c6..de92ce07da 100644 --- a/mteb/tasks/Audio/AudioClassification/__init__.py +++ b/mteb/tasks/Audio/AudioClassification/__init__.py @@ -7,6 +7,7 @@ from .eng.GTZANGenre import * from .eng.GunshotTriangulation import * from .eng.LibriCount import * +from .eng.IEMOCAP import * from .eng.MridinghamStroke import * from .eng.MridinghamTonic import * from .eng.NSynth import * diff --git a/mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py b/mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py new file mode 100644 index 0000000000..cb22e7f4d1 --- /dev/null +++ b/mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from mteb.abstasks.Audio.AbsTaskAudioClassification import ( + AbsTaskAudioClassification, +) +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class IEMOCAP(AbsTaskAudioClassification): + metadata = TaskMetadata( + name="IEMOCAP", + description="""IEMOCAP was recorded from ten actors in dyadic sessions with markers on the face, head, + and hands, which provide detailed information about their facial expression and + hand movements during scripted and spontaneous spoken communication scenarios. + actors performed selected emotional scripts and also improvised hypothetical + scenarios designed to elicit specific types of emotions (happiness, anger, sadness, frustration and neutral state). + After autmoated annotations, the final emotional categories selected for annotation were : + anger, sadness, happiness, disgust, fear and surprise, plus frustration, excited and neutral states + """, + reference="https://huggingface.co/datasets/AbstractTTS/IEMOCAP", + dataset={ + "path": "AbstractTTS/IEMOCAP", + "revision": "9f1696a135a65ce997d898d4121c952269a822ca", + }, + type="AudioClassification", + category="a2t", + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-08-09", "2024-08-11"), + domains=["Spoken"], + task_subtypes=["Emotion classification"], + license="not specified", + annotations_creators="automatic-and-reviewed", + dialect=[], + modalities=["audio"], + sample_creation="created", + bibtex_citation="""@article{article, + author = {Busso, Carlos and Bulut, Murtaza and Lee, Chi-Chun and Kazemzadeh, Abe and Mower Provost, Emily and Kim, Samuel and Chang, Jeannette and Lee, Sungbok and Narayanan, Shrikanth}, + year = {2008}, + month = {12}, + pages = {335-359}, + title = {IEMOCAP: Interactive emotional dyadic motion capture database}, + volume = {42}, + journal = {Language Resources and Evaluation}, + doi = {10.1007/s10579-008-9076-6} + }""", + # https://ecs.utdallas.edu/research/researchlabs/msp-lab/publications/Busso_2008_5.pdf + descriptive_stats={ + "n_samples": {"train": 10039}, + }, + ) + + audio_column_name: str = "audio" + label_column_name: str = "label" + samples_per_label: int = 10 + is_cross_validation: bool = True + + def dataset_transform(self): + ## map labels to ids + labels = [ + "sad", + "happy", + "surprise", + "frustrated", + "disgust", + "angry", + "neutral", + "excited", + "other", + "fear", + ] + + label2id = {i: j for j, i in enumerate(labels)} + + self.dataset = self.dataset.map( + lambda x: {"label": label2id[x["major_emotion"]]} + ) + + ## required to run the dataloader for cross-validation + import torch + + torch.multiprocessing.set_sharing_strategy("file_system") + ######################################################### From 7f91f644b8b10443658732328cc818f699e8a821 Mon Sep 17 00:00:00 2001 From: eppen Date: Tue, 18 Mar 2025 04:20:05 +0900 Subject: [PATCH 2/2] remove label mapping --- .../Audio/AudioClassification/eng/IEMOCAP.py | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py b/mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py index cb22e7f4d1..eddbbdbe85 100644 --- a/mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py +++ b/mteb/tasks/Audio/AudioClassification/eng/IEMOCAP.py @@ -52,31 +52,11 @@ class IEMOCAP(AbsTaskAudioClassification): ) audio_column_name: str = "audio" - label_column_name: str = "label" + label_column_name: str = "major_emotion" samples_per_label: int = 10 is_cross_validation: bool = True def dataset_transform(self): - ## map labels to ids - labels = [ - "sad", - "happy", - "surprise", - "frustrated", - "disgust", - "angry", - "neutral", - "excited", - "other", - "fear", - ] - - label2id = {i: j for j, i in enumerate(labels)} - - self.dataset = self.dataset.map( - lambda x: {"label": label2id[x["major_emotion"]]} - ) - ## required to run the dataloader for cross-validation import torch