embeddings-benchmark · KennethEnevoldsen · Aug 9, 2025 · Aug 1, 2025 · Aug 2, 2025 · Aug 2, 2025
diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py
@@ -92,6 +92,7 @@
     "machine-translated and verified",
     "machine-translated and localized",
     "LM-generated and verified",
+    "machine-translated and LM verified",
     "rendered",
     "multiple",
 ]

diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py
@@ -163,6 +163,18 @@
 from .tur.TurkishProductSentimentClassification import *
 from .ukr.UkrFormalityClassification import *
 from .urd.UrduRomanSentimentClassification import *
+from .vie.AmazonCounterfactualVNClassification import *
+from .vie.AmazonPolarityVNClassification import *
+from .vie.AmazonReviewsVNClassification import *
+from .vie.Banking77VNClassification import *
+from .vie.EmotionVNClassification import *
+from .vie.ImdbVNClassification import *
+from .vie.MassiveIntentVNClassification import *
+from .vie.MassiveScenarioVNClassification import *
+from .vie.MTOPDomainVNClassification import *
+from .vie.MTOPIntentVNClassification import *
+from .vie.ToxicConversationsVNClassification import *
+from .vie.TweetSentimentExtractionVNClassification import *
 from .vie.VieStudentFeedbackClassification import *
 from .zho.CMTEBClassification import *
 from .zho.YueOpenriceReviewClassification import (

diff --git a/mteb/tasks/Classification/vie/AmazonCounterfactualVNClassification.py b/mteb/tasks/Classification/vie/AmazonCounterfactualVNClassification.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class AmazonCounterfactualVNClassification(AbsTaskClassification):
+    num_samples = 32
+
+    metadata = TaskMetadata(
+        name="AmazonCounterfactualVNClassification",
+        dataset={
+            "path": "GreenNode/amazon-counterfactual-vn",
+            "revision": "b48bc27d383cfca5b6a47135a52390fa5f66b253",
+        },
+        description="""A collection of translated Amazon customer reviews annotated for counterfactual detection pair classification.
+        The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+        - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+        - Applies advanced embedding models to filter the translations.
+        - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.
+        """,
+        reference="https://arxiv.org/abs/2104.06893",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Counterfactual Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["AmazonCounterfactualClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/AmazonPolarityVNClassification.py b/mteb/tasks/Classification/vie/AmazonPolarityVNClassification.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class AmazonPolarityVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="AmazonPolarityVNClassification",
+        description="""A collection of translated Amazon customer reviews annotated for polarity classification.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.
+            """,
+        reference="https://huggingface.co/datasets/amazon_polarity",
+        dataset={
+            "path": "GreenNode/amazon-polarity-vn",
+            "revision": "4e9a0d6e6bd97ab32f23c50c043d751eed2a5f8a",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["AmazonPolarityClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/AmazonReviewsVNClassification.py b/mteb/tasks/Classification/vie/AmazonReviewsVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class AmazonReviewsVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="AmazonReviewsVNClassification",
+        dataset={
+            "path": "GreenNode/amazon-reviews-multi-vn",
+            "revision": "27da94deb6d4f44af789a3d70750fa506b79f189",
+        },
+        description="""A collection of translated Amazon reviews specifically designed to aid research in multilingual text classification.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2010.02573",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Emotion classification"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["AmazonReviewsClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/Banking77VNClassification.py b/mteb/tasks/Classification/vie/Banking77VNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class Banking77VNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="Banking77VNClassification",
+        description="""A translated dataset composed of online banking queries annotated with their corresponding intents.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2003.04807",
+        dataset={
+            "path": "GreenNode/banking77-vn",
+            "revision": "42541b07c25a49604be129fba6d70b752be229c1",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["Banking77Classification"],
+    )
diff --git a/mteb/tasks/Classification/vie/EmotionVNClassification.py b/mteb/tasks/Classification/vie/EmotionVNClassification.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class EmotionVNClassification(AbsTaskClassification):
+    num_samples = 16
+
+    metadata = TaskMetadata(
+        name="EmotionVNClassification",
+        description="""Emotion is a translated dataset of Vietnamese from English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.aclweb.org/anthology/D18-1404",
+        dataset={
+            "path": "GreenNode/emotion-vn",
+            "revision": "797a93c0dd755ebf5818fbf54d0e0a024df9216d",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["validation", "test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Social", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["EmotionClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/ImdbVNClassification.py b/mteb/tasks/Classification/vie/ImdbVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class ImdbVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="ImdbVNClassification",
+        description="""A translated dataset of large movie reviews annotated for sentiment classification.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        dataset={
+            "path": "GreenNode/imdb-vn",
+            "revision": "0dccb383ee26c90c99d03c8674cf40de642f099a",
+        },
+        reference="http://www.aclweb.org/anthology/P11-1015",
+        type="Classification",
+        category="p2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["ImdbClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/MTOPDomainVNClassification.py b/mteb/tasks/Classification/vie/MTOPDomainVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class MTOPDomainVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="MTOPDomainVNClassification",
+        dataset={
+            "path": "GreenNode/mtop-domain-vn",
+            "revision": "6e1ec8c54c018151c77472d94b1c0765230cf6ca",
+        },
+        description="""A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/pdf/2008.09335.pdf",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Spoken", "Spoken"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["MTOPDomainClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/MTOPIntentVNClassification.py b/mteb/tasks/Classification/vie/MTOPIntentVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class MTOPIntentVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="MTOPIntentVNClassification",
+        dataset={
+            "path": "GreenNode/mtop-intent-vn",
+            "revision": "c4e81a5c9a813a0142d905e261e5a446cc6fbc4a",
+        },
+        description="""A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/pdf/2008.09335.pdf",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Spoken", "Spoken"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["MTOPIntentClassification"],
+    )