diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index a13fa94bfc..b2abdd19e5 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -188,6 +188,8 @@ from .swe.SwednRetrieval import * from .swe.SweFaqRetrieval import * from .tur.TurHistQuad import * +from .vie.GreenNodeTableMarkdownRetrieval import * from .vie.VieQuADRetrieval import * +from .vie.ZacLegalTextRetrieval import * from .zho.CMTEBRetrieval import * from .zho.LeCaRDv2Retrieval import * diff --git a/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py b/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py new file mode 100644 index 0000000000..0fb95b0771 --- /dev/null +++ b/mteb/tasks/Retrieval/vie/GreenNodeTableMarkdownRetrieval.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval + +TEST_SAMPLES = 2048 + + +class GreenNodeTableMarkdownRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="GreenNodeTableMarkdownRetrieval", + description="GreenNodeTable documents", + reference="https://huggingface.co/GreenNode", + dataset={ + "path": "GreenNode/GreenNode-Table-Markdown-Retrieval-VN", + "revision": "d86a4dad9fd7c70359f617d86984395ea89be1c5", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["vie-Latn"], + main_score="ndcg_at_10", + date=("2025-03-16", "2025-03-16"), + domains=["Financial", "Encyclopaedic", "Non-fiction"], + task_subtypes=["Article retrieval"], + license="mit", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation=""" + TODO: Add bibtex citation when the paper is published + """, + ) diff --git a/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py b/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py new file mode 100644 index 0000000000..33cf333f9a --- /dev/null +++ b/mteb/tasks/Retrieval/vie/ZacLegalTextRetrieval.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval + + +class ZacLegalTextRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="ZacLegalTextRetrieval", + description="Zalo Legal Text documents", + reference="https://challenge.zalo.ai", + dataset={ + "path": "GreenNode/zalo-ai-legal-text-retrieval-vn", + "revision": "910766554633e8da014e88f54988705dde7ecaac", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["vie-Latn"], + main_score="ndcg_at_10", + date=("2025-03-16", "2025-03-16"), + domains=["Legal"], + task_subtypes=["Article retrieval"], + license="mit", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation=""" + TODO: Add bibtex citation when the paper is published + """, + )