Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions mteb/tasks/Audio/Any2AnyRetrieval/CMU_Arctic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from __future__ import annotations

from mteb.abstasks.Image.AbsTaskAny2AnyRetrieval import AbsTaskAny2AnyRetrieval
from mteb.abstasks.TaskMetadata import TaskMetadata


class CMUArcticA2TRetrieval(AbsTaskAny2AnyRetrieval):
metadata = TaskMetadata(
name="CMUArcticA2TRetrieval",
description=(
"Retrieve the correct transcription for an English speech segment. "
"The dataset is derived from the phonetically balanced CMU Arctic single-speaker TTS corpora. "
"The corpora contains 1150 samples based on read-aloud segments from books, which are out of copyright "
"and derived from the Gutenberg project."
),
reference="http://festvox.org/cmu_arctic/",
dataset={
"path": "mteb/CMU_Arctic_a2t",
"revision": "68e5228b82d03c20c22322ad22008464a32f960b",
},
type="Any2AnyRetrieval",
category="a2t",
modalities=["text", "audio"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="cv_recall_at_5",
date=("2000-01-01", "2002-12-31"),
domains=["Spoken"],
task_subtypes=["Speech Transcription Retrieval"],
license="cc0-1.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation=r"""
@techreport{cmu-lti-03-177,
author = {Clark, Rob and Richmond, Keith},
institution = {Carnegie Mellon University, Language Technologies Institute},
number = {CMU-LTI-03-177},
title = {A detailed report on the CMU Arctic speech database},
year = {2003},
}
""",
)


class CMUArcticT2ARetrieval(AbsTaskAny2AnyRetrieval):
"""Text-to-audio retrieval on CMU Arctic transcription ↔ audio pairs."""

metadata = TaskMetadata(
name="CMUArcticT2ARetrieval",
description=(
"Retrieve the correct audio segment for an English transcription. "
"The dataset is derived from the phonetically balanced CMU Arctic single-speaker TTS corpora. "
"The corpora contains 1150 audio-text pairs based on read-aloud segments from public domain books "
"originally sourced from the Gutenberg project."
),
reference="http://festvox.org/cmu_arctic/",
dataset={
"path": "mteb/CMU_Arctic_t2a",
"revision": "7c845fdfe355c226096203ffd4cdead3229950dc",
},
type="Any2AnyRetrieval",
category="t2a",
modalities=["text", "audio"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="cv_recall_at_5",
date=("2000-01-01", "2002-12-31"),
domains=["Spoken"],
task_subtypes=["Speech Transcription Retrieval"],
license="cc0-1.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation=r"""
@techreport{cmu-lti-03-177,
author = {Clark, Rob and Richmond, Keith},
institution = {Carnegie Mellon University, Language Technologies Institute},
number = {CMU-LTI-03-177},
title = {A detailed report on the CMU Arctic speech database},
year = {2003},
}
""",
)
1 change: 1 addition & 0 deletions mteb/tasks/Audio/Any2AnyRetrieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .AudioCaps import *
from .Clotho import *
from .CMU_Arctic import *
from .EmoVDB import *
from .HiFiTTS import *
from .MACS import *
Expand Down