diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py
index f2c631b3b2..1eb6f6bb63 100644
--- a/mteb/abstasks/TaskMetadata.py
+++ b/mteb/abstasks/TaskMetadata.py
@@ -92,6 +92,7 @@
     "machine-translated and verified",
     "machine-translated and localized",
     "LM-generated and verified",
+    "machine-translated and LM verified",
     "rendered",
     "multiple",
 ]
diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py
index 4392b280d8..d70c94944d 100644
--- a/mteb/tasks/Classification/__init__.py
+++ b/mteb/tasks/Classification/__init__.py
@@ -163,6 +163,18 @@
 from .tur.TurkishProductSentimentClassification import *
 from .ukr.UkrFormalityClassification import *
 from .urd.UrduRomanSentimentClassification import *
+from .vie.AmazonCounterfactualVNClassification import *
+from .vie.AmazonPolarityVNClassification import *
+from .vie.AmazonReviewsVNClassification import *
+from .vie.Banking77VNClassification import *
+from .vie.EmotionVNClassification import *
+from .vie.ImdbVNClassification import *
+from .vie.MassiveIntentVNClassification import *
+from .vie.MassiveScenarioVNClassification import *
+from .vie.MTOPDomainVNClassification import *
+from .vie.MTOPIntentVNClassification import *
+from .vie.ToxicConversationsVNClassification import *
+from .vie.TweetSentimentExtractionVNClassification import *
 from .vie.VieStudentFeedbackClassification import *
 from .zho.CMTEBClassification import *
 from .zho.YueOpenriceReviewClassification import (
diff --git a/mteb/tasks/Classification/vie/AmazonCounterfactualVNClassification.py b/mteb/tasks/Classification/vie/AmazonCounterfactualVNClassification.py
new file mode 100644
index 0000000000..540e5f2eda
--- /dev/null
+++ b/mteb/tasks/Classification/vie/AmazonCounterfactualVNClassification.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class AmazonCounterfactualVNClassification(AbsTaskClassification):
+    num_samples = 32
+
+    metadata = TaskMetadata(
+        name="AmazonCounterfactualVNClassification",
+        dataset={
+            "path": "GreenNode/amazon-counterfactual-vn",
+            "revision": "b48bc27d383cfca5b6a47135a52390fa5f66b253",
+        },
+        description="""A collection of translated Amazon customer reviews annotated for counterfactual detection pair classification.
+        The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+        - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+        - Applies advanced embedding models to filter the translations.
+        - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.
+        """,
+        reference="https://arxiv.org/abs/2104.06893",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Counterfactual Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["AmazonCounterfactualClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/AmazonPolarityVNClassification.py b/mteb/tasks/Classification/vie/AmazonPolarityVNClassification.py
new file mode 100644
index 0000000000..e3beceda5c
--- /dev/null
+++ b/mteb/tasks/Classification/vie/AmazonPolarityVNClassification.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class AmazonPolarityVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="AmazonPolarityVNClassification",
+        description="""A collection of translated Amazon customer reviews annotated for polarity classification.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.
+            """,
+        reference="https://huggingface.co/datasets/amazon_polarity",
+        dataset={
+            "path": "GreenNode/amazon-polarity-vn",
+            "revision": "4e9a0d6e6bd97ab32f23c50c043d751eed2a5f8a",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["AmazonPolarityClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/AmazonReviewsVNClassification.py b/mteb/tasks/Classification/vie/AmazonReviewsVNClassification.py
new file mode 100644
index 0000000000..25ad64744e
--- /dev/null
+++ b/mteb/tasks/Classification/vie/AmazonReviewsVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class AmazonReviewsVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="AmazonReviewsVNClassification",
+        dataset={
+            "path": "GreenNode/amazon-reviews-multi-vn",
+            "revision": "27da94deb6d4f44af789a3d70750fa506b79f189",
+        },
+        description="""A collection of translated Amazon reviews specifically designed to aid research in multilingual text classification.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2010.02573",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Emotion classification"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["AmazonReviewsClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/Banking77VNClassification.py b/mteb/tasks/Classification/vie/Banking77VNClassification.py
new file mode 100644
index 0000000000..a051965bca
--- /dev/null
+++ b/mteb/tasks/Classification/vie/Banking77VNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class Banking77VNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="Banking77VNClassification",
+        description="""A translated dataset composed of online banking queries annotated with their corresponding intents.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2003.04807",
+        dataset={
+            "path": "GreenNode/banking77-vn",
+            "revision": "42541b07c25a49604be129fba6d70b752be229c1",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["Banking77Classification"],
+    )
diff --git a/mteb/tasks/Classification/vie/EmotionVNClassification.py b/mteb/tasks/Classification/vie/EmotionVNClassification.py
new file mode 100644
index 0000000000..d2a7b44e7a
--- /dev/null
+++ b/mteb/tasks/Classification/vie/EmotionVNClassification.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class EmotionVNClassification(AbsTaskClassification):
+    num_samples = 16
+
+    metadata = TaskMetadata(
+        name="EmotionVNClassification",
+        description="""Emotion is a translated dataset of Vietnamese from English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.aclweb.org/anthology/D18-1404",
+        dataset={
+            "path": "GreenNode/emotion-vn",
+            "revision": "797a93c0dd755ebf5818fbf54d0e0a024df9216d",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["validation", "test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Social", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["EmotionClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/ImdbVNClassification.py b/mteb/tasks/Classification/vie/ImdbVNClassification.py
new file mode 100644
index 0000000000..9d87ca3c98
--- /dev/null
+++ b/mteb/tasks/Classification/vie/ImdbVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class ImdbVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="ImdbVNClassification",
+        description="""A translated dataset of large movie reviews annotated for sentiment classification.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        dataset={
+            "path": "GreenNode/imdb-vn",
+            "revision": "0dccb383ee26c90c99d03c8674cf40de642f099a",
+        },
+        reference="http://www.aclweb.org/anthology/P11-1015",
+        type="Classification",
+        category="p2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Reviews", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["ImdbClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/MTOPDomainVNClassification.py b/mteb/tasks/Classification/vie/MTOPDomainVNClassification.py
new file mode 100644
index 0000000000..9050165762
--- /dev/null
+++ b/mteb/tasks/Classification/vie/MTOPDomainVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class MTOPDomainVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="MTOPDomainVNClassification",
+        dataset={
+            "path": "GreenNode/mtop-domain-vn",
+            "revision": "6e1ec8c54c018151c77472d94b1c0765230cf6ca",
+        },
+        description="""A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/pdf/2008.09335.pdf",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Spoken", "Spoken"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["MTOPDomainClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/MTOPIntentVNClassification.py b/mteb/tasks/Classification/vie/MTOPIntentVNClassification.py
new file mode 100644
index 0000000000..d50b263722
--- /dev/null
+++ b/mteb/tasks/Classification/vie/MTOPIntentVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class MTOPIntentVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="MTOPIntentVNClassification",
+        dataset={
+            "path": "GreenNode/mtop-intent-vn",
+            "revision": "c4e81a5c9a813a0142d905e261e5a446cc6fbc4a",
+        },
+        description="""A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/pdf/2008.09335.pdf",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Spoken", "Spoken"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["MTOPIntentClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/MassiveIntentVNClassification.py b/mteb/tasks/Classification/vie/MassiveIntentVNClassification.py
new file mode 100644
index 0000000000..49e7fe0219
--- /dev/null
+++ b/mteb/tasks/Classification/vie/MassiveIntentVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class MassiveIntentVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="MassiveIntentVNClassification",
+        dataset={
+            "path": "GreenNode/amazon-massive-intent-vn",
+            "revision": "35c7ced69f958dbbaa24f792db4a9250e461866d",
+        },
+        description="""A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Spoken"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["MassiveIntentClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/MassiveScenarioVNClassification.py b/mteb/tasks/Classification/vie/MassiveScenarioVNClassification.py
new file mode 100644
index 0000000000..5d214e9fa2
--- /dev/null
+++ b/mteb/tasks/Classification/vie/MassiveScenarioVNClassification.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class MassiveScenarioVNClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="MassiveScenarioVNClassification",
+        dataset={
+            "path": "GreenNode/amazon-massive-scenario-vn",
+            "revision": "a82e282d9f5aec1a8cf7d868ce40f70669c16b89",
+        },
+        description="""A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.",
+        category="s2s",
+        type="Classification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Spoken"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["MassiveScenarioClassification"],
+    )
diff --git a/mteb/tasks/Classification/vie/ToxicConversationsVNClassification.py b/mteb/tasks/Classification/vie/ToxicConversationsVNClassification.py
new file mode 100644
index 0000000000..a3fb2d6d13
--- /dev/null
+++ b/mteb/tasks/Classification/vie/ToxicConversationsVNClassification.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class ToxicConversationsVNClassification(AbsTaskClassification):
+    num_samples = 16
+
+    metadata = TaskMetadata(
+        name="ToxicConversationsVNClassification",
+        description="""A translated dataset from Collection of comments from the Civil Comments platform together with annotations if the comment is toxic or not.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview",
+        dataset={
+            "path": "GreenNode/toxic-conversations-50k-vn",
+            "revision": "2cc697991407cbbe34e7ef7bc9564449a4a99132",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Social", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["ToxicConversationsClassification"],
+    )
+
+    def dataset_transform(self):
+        self.dataset = self.stratified_subsampling(
+            self.dataset, seed=self.seed, splits=["test"]
+        )
diff --git a/mteb/tasks/Classification/vie/TweetSentimentExtractionVNClassification.py b/mteb/tasks/Classification/vie/TweetSentimentExtractionVNClassification.py
new file mode 100644
index 0000000000..1645c182bd
--- /dev/null
+++ b/mteb/tasks/Classification/vie/TweetSentimentExtractionVNClassification.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClassification import AbsTaskClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class TweetSentimentExtractionVNClassification(AbsTaskClassification):
+    num_samples = 32
+
+    metadata = TaskMetadata(
+        name="TweetSentimentExtractionVNClassification",
+        description="""A collection of translated tweets annotated for sentiment extraction.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.kaggle.com/competitions/tweet-sentiment-extraction/overview",
+        dataset={
+            "path": "GreenNode/tweet-sentiment-extraction-vn",
+            "revision": "f453803eff1e91579eb235dc1d7c38d39b3f1340",
+        },
+        type="Classification",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="accuracy",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Social", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["TweetSentimentExtractionClassification"],
+    )
diff --git a/mteb/tasks/Clustering/__init__.py b/mteb/tasks/Clustering/__init__.py
index 0c86095000..3476ea491a 100644
--- a/mteb/tasks/Clustering/__init__.py
+++ b/mteb/tasks/Clustering/__init__.py
@@ -50,4 +50,9 @@
 from .spa.SpanishNewsClusteringP2P import *
 from .swe.swedn_clustering import *
 from .swe.SwednClustering import *
+from .vie.RedditClusteringP2PVN import *
+from .vie.RedditClusteringVN import *
+from .vie.StackExchangeClusteringP2PVN import *
+from .vie.StackExchangeClusteringVN import *
+from .vie.TwentyNewsgroupsClusteringVN import *
 from .zho.CMTEBClustering import *
diff --git a/mteb/tasks/Clustering/vie/RedditClusteringP2PVN.py b/mteb/tasks/Clustering/vie/RedditClusteringP2PVN.py
new file mode 100644
index 0000000000..6f200ca758
--- /dev/null
+++ b/mteb/tasks/Clustering/vie/RedditClusteringP2PVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class RedditClusteringP2PVN(AbsTaskClustering):
+    metadata = TaskMetadata(
+        name="RedditClusteringP2P-VN",
+        description="""A translated dataset from Clustering of title+posts from reddit. Clustering of 10 sets of 50k paragraphs and 40 sets of 10k paragraphs.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2104.07081",
+        dataset={
+            "path": "GreenNode/reddit-clustering-p2p-vn",
+            "revision": "841856dcb82496f1f2f59356e4798ce15baeb200",
+        },
+        type="Clustering",
+        category="p2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="v_measure",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Web", "Social", "Written"],
+        task_subtypes=["Thematic clustering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["RedditClusteringP2P"],
+    )
diff --git a/mteb/tasks/Clustering/vie/RedditClusteringVN.py b/mteb/tasks/Clustering/vie/RedditClusteringVN.py
new file mode 100644
index 0000000000..0bb1cf95ec
--- /dev/null
+++ b/mteb/tasks/Clustering/vie/RedditClusteringVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class RedditClusteringVN(AbsTaskClustering):
+    metadata = TaskMetadata(
+        name="RedditClustering-VN",
+        description="""A translated dataset from Clustering of titles from 199 subreddits. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2104.07081",
+        dataset={
+            "path": "GreenNode/reddit-clustering-vn",
+            "revision": "7f7d4097979633181b2df3f73905218f74c4665d",
+        },
+        type="Clustering",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="v_measure",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Web", "Social", "Written"],
+        task_subtypes=["Thematic clustering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["RedditClustering"],
+    )
diff --git a/mteb/tasks/Clustering/vie/StackExchangeClusteringP2PVN.py b/mteb/tasks/Clustering/vie/StackExchangeClusteringP2PVN.py
new file mode 100644
index 0000000000..24e578deb9
--- /dev/null
+++ b/mteb/tasks/Clustering/vie/StackExchangeClusteringP2PVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class StackExchangeClusteringP2PVN(AbsTaskClustering):
+    metadata = TaskMetadata(
+        name="StackExchangeClusteringP2P-VN",
+        description="""A translated Clustering of title+body from stackexchange. Clustering of 5 sets of 10k paragraphs and 5 sets of 5k paragraphs.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2104.07081",
+        dataset={
+            "path": "GreenNode/stackexchange-clustering-p2p-vn",
+            "revision": "8f154ee524a466850028531d21e1a62d958b8156",
+        },
+        type="Clustering",
+        category="p2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="v_measure",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Web", "Written"],
+        task_subtypes=["Thematic clustering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["StackExchangeClusteringP2P"],
+    )
diff --git a/mteb/tasks/Clustering/vie/StackExchangeClusteringVN.py b/mteb/tasks/Clustering/vie/StackExchangeClusteringVN.py
new file mode 100644
index 0000000000..d476c41de1
--- /dev/null
+++ b/mteb/tasks/Clustering/vie/StackExchangeClusteringVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class StackExchangeClusteringVN(AbsTaskClustering):
+    metadata = TaskMetadata(
+        name="StackExchangeClustering-VN",
+        description="""A translated dataset from Clustering of titles from 121 stackexchanges. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://arxiv.org/abs/2104.07081",
+        dataset={
+            "path": "GreenNode/stackexchange-clustering-vn",
+            "revision": "cf01db048f2bf705741675b51613dc48e0bb122b",
+        },
+        type="Clustering",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="v_measure",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Web", "Written"],
+        task_subtypes=["Thematic clustering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["StackExchangeClustering"],
+    )
diff --git a/mteb/tasks/Clustering/vie/TwentyNewsgroupsClusteringVN.py b/mteb/tasks/Clustering/vie/TwentyNewsgroupsClusteringVN.py
new file mode 100644
index 0000000000..d45be8112d
--- /dev/null
+++ b/mteb/tasks/Clustering/vie/TwentyNewsgroupsClusteringVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class TwentyNewsgroupsClusteringVN(AbsTaskClustering):
+    metadata = TaskMetadata(
+        name="TwentyNewsgroupsClustering-VN",
+        description="""A translated dataset from Clustering of the 20 Newsgroups dataset (subject only).
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html",
+        dataset={
+            "path": "GreenNode/twentynewsgroups-clustering-vn",
+            "revision": "770e1b9029cd85c79410bc6df1528a43fc2b9ad1",
+        },
+        type="Clustering",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="v_measure",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["News", "Written"],
+        task_subtypes=["Thematic clustering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["TwentyNewsgroupsClustering"],
+    )
diff --git a/mteb/tasks/Clustering/vie/__init__.py b/mteb/tasks/Clustering/vie/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/mteb/tasks/PairClassification/__init__.py b/mteb/tasks/PairClassification/__init__.py
index d3ecd19272..62e9d8daa2 100644
--- a/mteb/tasks/PairClassification/__init__.py
+++ b/mteb/tasks/PairClassification/__init__.py
@@ -28,4 +28,7 @@
 from .por.Assin2RTE import *
 from .por.SickBrPC import *
 from .rus.TERRa import *
+from .vie.SprintDuplicateQuestionsPCVN import *
+from .vie.TwitterSemEval2015PCVN import *
+from .vie.TwitterURLCorpusPCVN import *
 from .zho.CMTEBPairClassification import *
diff --git a/mteb/tasks/PairClassification/vie/SprintDuplicateQuestionsPCVN.py b/mteb/tasks/PairClassification/vie/SprintDuplicateQuestionsPCVN.py
new file mode 100644
index 0000000000..13b5bd6d52
--- /dev/null
+++ b/mteb/tasks/PairClassification/vie/SprintDuplicateQuestionsPCVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class SprintDuplicateQuestionsPCVN(AbsTaskPairClassification):
+    metadata = TaskMetadata(
+        name="SprintDuplicateQuestions-VN",
+        description="""A translated dataset from Duplicate questions from the Sprint community.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.aclweb.org/anthology/D18-1131/",
+        dataset={
+            "path": "GreenNode/sprintduplicatequestions-pairclassification-vn",
+            "revision": "2552beae0e4fe7fe05d088814f78a4c309ad2219",
+        },
+        type="PairClassification",
+        category="s2s",
+        eval_splits=["validation", "test"],
+        eval_langs=["vie-Latn"],
+        main_score="ap",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Programming", "Written"],
+        task_subtypes=["Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["SprintDuplicateQuestions"],
+    )
diff --git a/mteb/tasks/PairClassification/vie/TwitterSemEval2015PCVN.py b/mteb/tasks/PairClassification/vie/TwitterSemEval2015PCVN.py
new file mode 100644
index 0000000000..8587c22d78
--- /dev/null
+++ b/mteb/tasks/PairClassification/vie/TwitterSemEval2015PCVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class TwitterSemEval2015PCVN(AbsTaskPairClassification):
+    metadata = TaskMetadata(
+        name="TwitterSemEval2015-VN",
+        dataset={
+            "path": "GreenNode/twittersemeval2015-pairclassification-vn",
+            "revision": "9215a3c954078fd15c2bbecca914477d53944de1",
+        },
+        description="""A translated dataset from Paraphrase-Pairs of Tweets from the SemEval 2015 workshop.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://alt.qcri.org/semeval2015/task1/",
+        category="s2s",
+        type="PairClassification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ap",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Social", "Written"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["TwitterSemEval2015"],
+    )
diff --git a/mteb/tasks/PairClassification/vie/TwitterURLCorpusPCVN.py b/mteb/tasks/PairClassification/vie/TwitterURLCorpusPCVN.py
new file mode 100644
index 0000000000..39d91783ab
--- /dev/null
+++ b/mteb/tasks/PairClassification/vie/TwitterURLCorpusPCVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class TwitterURLCorpusPC(AbsTaskPairClassification):
+    metadata = TaskMetadata(
+        name="TwitterURLCorpus-VN",
+        dataset={
+            "path": "GreenNode/twitterurlcorpus-pairclassification-vn",
+            "revision": "6e6a40aaade2129f70432f2156a6d24b63d72be3",
+        },
+        description="""A translated dataset from Paraphrase-Pairs of Tweets.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://languagenet.github.io/",
+        category="s2s",
+        type="PairClassification",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ap",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Social", "Written"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["TwitterURLCorpus"],
+    )
diff --git a/mteb/tasks/PairClassification/vie/__init__.py b/mteb/tasks/PairClassification/vie/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/mteb/tasks/Reranking/__init__.py b/mteb/tasks/Reranking/__init__.py
index ef4f8531c6..06ffc29141 100644
--- a/mteb/tasks/Reranking/__init__.py
+++ b/mteb/tasks/Reranking/__init__.py
@@ -17,4 +17,7 @@
 from .multilingual.WikipediaRerankingMultilingual import *
 from .multilingual.XGlueWPRReranking import *
 from .rus.RuBQReranking import *
+from .vie.AskUbuntuDupQuestionsVN import *
+from .vie.SciDocsRerankingVN import *
+from .vie.StackOverflowDupQuestionsVN import *
 from .zho.CMTEBReranking import *
diff --git a/mteb/tasks/Reranking/vie/AskUbuntuDupQuestionsVN.py b/mteb/tasks/Reranking/vie/AskUbuntuDupQuestionsVN.py
new file mode 100644
index 0000000000..a1eaa9485b
--- /dev/null
+++ b/mteb/tasks/Reranking/vie/AskUbuntuDupQuestionsVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskReranking import AbsTaskReranking
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class AskUbuntuDupQuestionsVN(AbsTaskReranking):
+    metadata = TaskMetadata(
+        name="AskUbuntuDupQuestions-VN",
+        description="""A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://github.com/taolei87/askubuntu",
+        dataset={
+            "path": "GreenNode/askubuntudupquestions-reranking-vn",
+            "revision": "5cfaa5c07252d30c37302bfc056f0d85884971a1",
+        },
+        type="Reranking",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="map",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Programming", "Web"],
+        task_subtypes=["Scientific Reranking"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["AskUbuntuDupQuestions"],
+    )
diff --git a/mteb/tasks/Reranking/vie/SciDocsRerankingVN.py b/mteb/tasks/Reranking/vie/SciDocsRerankingVN.py
new file mode 100644
index 0000000000..68b527dc07
--- /dev/null
+++ b/mteb/tasks/Reranking/vie/SciDocsRerankingVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskReranking import AbsTaskReranking
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class SciDocsRerankingVN(AbsTaskReranking):
+    metadata = TaskMetadata(
+        name="SciDocsRR-VN",
+        description="""A translated dataset from Ranking of related scientific papers based on their title.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://allenai.org/data/scidocs",
+        dataset={
+            "path": "GreenNode/scidocs-reranking-vn",
+            "revision": "c9ab36ae6c75f754df6f1e043c09b5e0b5547cac",
+        },
+        type="Reranking",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="map",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Academic", "Non-fiction", "Written"],
+        task_subtypes=["Scientific Reranking"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["SciDocsRR"],
+    )
diff --git a/mteb/tasks/Reranking/vie/StackOverflowDupQuestionsVN.py b/mteb/tasks/Reranking/vie/StackOverflowDupQuestionsVN.py
new file mode 100644
index 0000000000..0fe2e3b7f9
--- /dev/null
+++ b/mteb/tasks/Reranking/vie/StackOverflowDupQuestionsVN.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskReranking import AbsTaskReranking
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class StackOverflowDupQuestionsVN(AbsTaskReranking):
+    metadata = TaskMetadata(
+        name="StackOverflowDupQuestions-VN",
+        description="""A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf",
+        dataset={
+            "path": "GreenNode/stackoverflowdupquestions-reranking-vn",
+            "revision": "3ceb17db245f52beaf27a3720aa71e1cc5f06faf",
+        },
+        type="Reranking",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="map",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Academic", "Non-fiction", "Written"],
+        task_subtypes=["Scientific Reranking"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["StackOverflowDupQuestions"],
+    )
diff --git a/mteb/tasks/Reranking/vie/__init__.py b/mteb/tasks/Reranking/vie/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py
index 91a63662ae..6b19080b71 100644
--- a/mteb/tasks/Retrieval/__init__.py
+++ b/mteb/tasks/Retrieval/__init__.py
@@ -195,7 +195,31 @@
 from .swe.SwednRetrieval import *
 from .swe.SweFaqRetrieval import *
 from .tur.TurHistQuad import *
+from .vie.ArguAnaVNRetrieval import *
+from .vie.ClimateFEVERVNRetrieval import *
+from .vie.CQADupstackAndroidVNRetrieval import *
+from .vie.CQADupstackGisVNRetrieval import *
+from .vie.CQADupstackMathematicaVNRetrieval import *
+from .vie.CQADupstackPhysicsVNRetrieval import *
+from .vie.CQADupstackProgrammersVNRetrieval import *
+from .vie.CQADupstackStatsVNRetrieval import *
+from .vie.CQADupstackTexVNRetrieval import *
+from .vie.CQADupstackUnixVNRetrieval import *
+from .vie.CQADupstackWebmastersVNRetrieval import *
+from .vie.CQADupstackWordpressVNRetrieval import *
+from .vie.DBPediaVNRetrieval import *
+from .vie.FEVERVNRetrieval import *
+from .vie.FiQA2018VNRetrieval import *
 from .vie.GreenNodeTableMarkdownRetrieval import *
+from .vie.HotpotQAVNRetrieval import *
+from .vie.MSMARCOVNRetrieval import *
+from .vie.NFCorpusVNRetrieval import *
+from .vie.NQVNRetrieval import *
+from .vie.QuoraVNRetrieval import *
+from .vie.SCIDOCSVNRetrieval import *
+from .vie.SciFactVNRetrieval import *
+from .vie.Touche2020VNRetrieval import *
+from .vie.TRECCOVIDVNRetrieval import *
 from .vie.VieQuADRetrieval import *
 from .vie.ZacLegalTextRetrieval import *
 from .zho.CMTEBRetrieval import *
diff --git a/mteb/tasks/Retrieval/vie/ArguAnaVNRetrieval.py b/mteb/tasks/Retrieval/vie/ArguAnaVNRetrieval.py
new file mode 100644
index 0000000000..4492d9b493
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/ArguAnaVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class ArguAnaVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="ArguAna-VN",
+        description="""A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://argumentation.bplaced.net/arguana/data",
+        dataset={
+            "path": "GreenNode/arguana-vn",
+            "revision": "2a5133a05d7430e6f353497b1624a6e73148105b",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Medical", "Written"],
+        task_subtypes=["Article retrieval"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["ArguAna"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackAndroidVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackAndroidVNRetrieval.py
new file mode 100644
index 0000000000..cb177bdf05
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackAndroidVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackAndroidVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackAndroid-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-android-vn",
+            "revision": "4a022e7213ccc05ee970a176abd0293b3a0a2da0",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Programming", "Web", "Written", "Non-fiction"],
+        task_subtypes=["Question answering", "Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackAndroid"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackGisVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackGisVNRetrieval.py
new file mode 100644
index 0000000000..2b531eb7d8
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackGisVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackGisVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackGis-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-gis-vn",
+            "revision": "755156d548a8288efdb29b80bad302750ab33977",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Non-fiction"],
+        task_subtypes=["Question answering", "Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackGis"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackMathematicaVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackMathematicaVNRetrieval.py
new file mode 100644
index 0000000000..a12c8f1c29
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackMathematicaVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackMathematicaVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackMathematica-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-mathematica-vn",
+            "revision": "d0cc9b60ba66faa3fb21cb9a54ef969af548b312",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Academic", "Non-fiction"],
+        task_subtypes=["Question answering", "Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackMathematica"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackPhysicsVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackPhysicsVNRetrieval.py
new file mode 100644
index 0000000000..e10955e22a
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackPhysicsVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackPhysicsVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackPhysics-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-physics-vn",
+            "revision": "8b6b68b59933cc72985f674f76c80a678c27d6be",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Academic", "Non-fiction"],
+        task_subtypes=["Question answering", "Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackPhysics"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackProgrammersVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackProgrammersVNRetrieval.py
new file mode 100644
index 0000000000..ebe78eeddc
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackProgrammersVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackProgrammersRetrieval(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackProgrammers-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-programmers-vn",
+            "revision": "1a628c4e61f71ffdb7707d6d4024d25cfe68215a",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Programming", "Written", "Non-fiction"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackProgrammers"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackStatsVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackStatsVNRetrieval.py
new file mode 100644
index 0000000000..339f9f43d4
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackStatsVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackStatsVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackStats-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-stats-vn",
+            "revision": "6b8164f3af61f3bb7728724229ba36213fb46c25",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Academic", "Non-fiction"],
+        task_subtypes=["Question answering", "Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackStats"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackTexVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackTexVNRetrieval.py
new file mode 100644
index 0000000000..ceb699bb88
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackTexVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackTexVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackTex-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-tex-vn",
+            "revision": "aec43e5ae40451526528b3fc80dd5983ec388e21",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Non-fiction"],
+        task_subtypes=["Question answering", "Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackTex"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackUnixVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackUnixVNRetrieval.py
new file mode 100644
index 0000000000..dc751d04a9
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackUnixVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackUnixVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackUnix-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-unix-vn",
+            "revision": "f8b884697871cb38901139f2435c273135f83a3f",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Web", "Programming"],
+        task_subtypes=["Question answering", "Duplicate Detection"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackUnix"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackWebmastersVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackWebmastersVNRetrieval.py
new file mode 100644
index 0000000000..03751d00ae
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackWebmastersVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackWebmastersVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackWebmasters-VN",
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        dataset={
+            "path": "GreenNode/cqadupstack-webmasters-vn",
+            "revision": "482d6e560d977b137e435d33379c5a8049e70e8d",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Web"],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackWebmasters"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/CQADupstackWordpressVNRetrieval.py b/mteb/tasks/Retrieval/vie/CQADupstackWordpressVNRetrieval.py
new file mode 100644
index 0000000000..a77659887b
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/CQADupstackWordpressVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class CQADupstackWordpressVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="CQADupstackWordpress-VN",
+        dataset={
+            "path": "GreenNode/cqadupstack-wordpress-vn",
+            "revision": "2230f80e1baf42aa005731ca86577621c566fcd7",
+        },
+        description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Web", "Programming"],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["CQADupstackWordpress"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/ClimateFEVERVNRetrieval.py b/mteb/tasks/Retrieval/vie/ClimateFEVERVNRetrieval.py
new file mode 100644
index 0000000000..9edb81001e
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/ClimateFEVERVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class ClimateFEVERVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="ClimateFEVER-VN",
+        description="""A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
+        dataset={
+            "path": "GreenNode/climate-fever-vn",
+            "revision": "42328bf787e17b1ad1a88be4f5e87ea9fb668511",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Encyclopaedic", "Written"],
+        task_subtypes=["Claim verification"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["ClimateFEVER"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/DBPediaVNRetrieval.py b/mteb/tasks/Retrieval/vie/DBPediaVNRetrieval.py
new file mode 100644
index 0000000000..ec01dc8abe
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/DBPediaVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class DBPediaVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="DBPedia-VN",
+        description="""A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://github.com/iai-group/DBpedia-Entity/",
+        dataset={
+            "path": "GreenNode/dbpedia-vn",
+            "revision": "c3e20179fbcee16217ef9461a14a54b7faca9b63",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Encyclopaedic"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["DBPedia"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/FEVERVNRetrieval.py b/mteb/tasks/Retrieval/vie/FEVERVNRetrieval.py
new file mode 100644
index 0000000000..49fdcf2fe6
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/FEVERVNRetrieval.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class FEVERVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="FEVER-VN",
+        dataset={
+            "path": "GreenNode/fever-vn",
+            "revision": "a543dd8b98aed3603110c01d26db05ba39b87d49",
+        },
+        description="""A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences
+            extracted from Wikipedia and subsequently verified without knowledge of the sentence they were
+            derived from.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://fever.ai/",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Encyclopaedic", "Written"],
+        task_subtypes=["Claim verification"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["FEVER"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/FiQA2018VNRetrieval.py b/mteb/tasks/Retrieval/vie/FiQA2018VNRetrieval.py
new file mode 100644
index 0000000000..a039aa53c9
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/FiQA2018VNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class FiQA2018VN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="FiQA2018-VN",
+        description="""A translated dataset from Financial Opinion Mining and Question Answering
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://sites.google.com/view/fiqa/",
+        dataset={
+            "path": "GreenNode/fiqa-vn",
+            "revision": "6c3cdf6f102151dbbbbc1d2cf999b305eba44dae",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Financial"],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["FiQA2018"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/HotpotQAVNRetrieval.py b/mteb/tasks/Retrieval/vie/HotpotQAVNRetrieval.py
new file mode 100644
index 0000000000..13de52d87a
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/HotpotQAVNRetrieval.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class HotpotQAVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="HotpotQA-VN",
+        dataset={
+            "path": "GreenNode/hotpotqa-vn",
+            "revision": "8a5220c7af5084f0d5d2afeb74f9c2b41b759ff0",
+        },
+        description="""A translated dataset from HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong
+            supervision for supporting facts to enable more explainable question answering systems.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://hotpotqa.github.io/",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Web", "Written"],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["HotpotQA"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/MSMARCOVNRetrieval.py b/mteb/tasks/Retrieval/vie/MSMARCOVNRetrieval.py
new file mode 100644
index 0000000000..2fbb5140f4
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/MSMARCOVNRetrieval.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class MSMARCOVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="MSMARCO-VN",
+        dataset={
+            "path": "GreenNode/msmarco-vn",
+            "revision": "85d1ad4cc9070b8d019d65f5af1631a2ab91e294",
+        },
+        description="""A translated dataset from MS MARCO is a collection of datasets focused on deep learning in search
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://microsoft.github.io/msmarco/",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["dev"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=[
+            "Encyclopaedic",
+            "Academic",
+            "Blog",
+            "News",
+            "Medical",
+            "Government",
+            "Reviews",
+            "Non-fiction",
+            "Social",
+            "Web",
+        ],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["MSMARCO"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/NFCorpusVNRetrieval.py b/mteb/tasks/Retrieval/vie/NFCorpusVNRetrieval.py
new file mode 100644
index 0000000000..7086eb5b7a
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/NFCorpusVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class NFCorpusVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="NFCorpus-VN",
+        dataset={
+            "path": "GreenNode/nfcorpus-vn",
+            "revision": "a13d72fbb859be3dc19ab669d1ec9510407d2dcd",
+        },
+        description="""A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Medical", "Academic", "Written"],
+        task_subtypes=["Article retrieval"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["NFCorpus"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/NQVNRetrieval.py b/mteb/tasks/Retrieval/vie/NQVNRetrieval.py
new file mode 100644
index 0000000000..5bb940c167
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/NQVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class NQVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="NQ-VN",
+        dataset={
+            "path": "GreenNode/nq-vn",
+            "revision": "40a6d7f343b9c9f4855a426d8c431ad5f8aaf56b",
+        },
+        description="""A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://ai.google.com/research/NaturalQuestions/",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Encyclopaedic"],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["NQ"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/QuoraVNRetrieval.py b/mteb/tasks/Retrieval/vie/QuoraVNRetrieval.py
new file mode 100644
index 0000000000..d3e357f871
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/QuoraVNRetrieval.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class QuoraVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="Quora-VN",
+        dataset={
+            "path": "GreenNode/quora-vn",
+            "revision": "3363d81e41b67c1032bf3b234882a03d271e2289",
+        },
+        description="""A translated dataset from QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a
+            question, find other (duplicate) questions.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
+        type="Retrieval",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Written", "Web", "Blog"],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["Quora"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/SCIDOCSVNRetrieval.py b/mteb/tasks/Retrieval/vie/SCIDOCSVNRetrieval.py
new file mode 100644
index 0000000000..93088401c9
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/SCIDOCSVNRetrieval.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class SCIDOCSVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="SCIDOCS-VN",
+        dataset={
+            "path": "GreenNode/scidocs-vn",
+            "revision": "724cddfa9d328a193f303a0a9b7789468ac79f26",
+        },
+        description="""A translated dataset from SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation
+            prediction, to document classification and recommendation.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://allenai.org/data/scidocs",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Academic", "Written", "Non-fiction"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["SCIDOCS"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/SciFactVNRetrieval.py b/mteb/tasks/Retrieval/vie/SciFactVNRetrieval.py
new file mode 100644
index 0000000000..d2d5034742
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/SciFactVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class SciFactVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="SciFact-VN",
+        dataset={
+            "path": "GreenNode/scifact-vn",
+            "revision": "483a7cf890c523c954e7751d328c5bb65061dcff",
+        },
+        description="""A translated dataset from SciFact verifies scientific claims using evidence from the research literature containing scientific paper abstracts.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://github.com/allenai/scifact",
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Academic", "Medical", "Written"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["SciFact"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/TRECCOVIDVNRetrieval.py b/mteb/tasks/Retrieval/vie/TRECCOVIDVNRetrieval.py
new file mode 100644
index 0000000000..ef9f69224a
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/TRECCOVIDVNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class TRECCOVIDVN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="TRECCOVID-VN",
+        description="""A translated dataset from TRECCOVID is an ad-hoc search challenge based on the COVID-19 dataset containing scientific articles related to the COVID-19 pandemic.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://ir.nist.gov/covidSubmit/index.html",
+        dataset={
+            "path": "GreenNode/trec-covid-vn",
+            "revision": "54d73a1ea11ea0ae4ec0214ec519c93db79dee88",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Medical", "Academic", "Written"],
+        task_subtypes=["Article retrieval"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["TRECCOVID"],
+    )
diff --git a/mteb/tasks/Retrieval/vie/Touche2020VNRetrieval.py b/mteb/tasks/Retrieval/vie/Touche2020VNRetrieval.py
new file mode 100644
index 0000000000..8011850d66
--- /dev/null
+++ b/mteb/tasks/Retrieval/vie/Touche2020VNRetrieval.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class Touche2020VN(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="Touche2020-VN",
+        description="""A translated dataset from Touché Task 1: Argument Retrieval for Controversial Questions
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://webis.de/events/touche-20/shared-task-1.html",
+        dataset={
+            "path": "GreenNode/webis-touche2020-vn",
+            "revision": "cd4389b182aec622c8121ee8db988359197159c1",
+        },
+        type="Retrieval",
+        category="s2p",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="ndcg_at_10",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Academic"],
+        task_subtypes=["Question answering"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["Touche2020"],
+    )
diff --git a/mteb/tasks/STS/__init__.py b/mteb/tasks/STS/__init__.py
index 471789f1c9..00ccf0d7ff 100644
--- a/mteb/tasks/STS/__init__.py
+++ b/mteb/tasks/STS/__init__.py
@@ -29,4 +29,7 @@
 from .rus.RUParaPhraserSTS import *
 from .rus.RuSTSBenchmarkSTS import *
 from .spa.STSES import *
+from .vie.BiossesSTSVN import *
+from .vie.SickrSTSVN import *
+from .vie.STSBenchmarkSTSVN import *
 from .zho.CMTEBSTS import *
diff --git a/mteb/tasks/STS/vie/BiossesSTSVN.py b/mteb/tasks/STS/vie/BiossesSTSVN.py
new file mode 100644
index 0000000000..dcc1819289
--- /dev/null
+++ b/mteb/tasks/STS/vie/BiossesSTSVN.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskSTS import AbsTaskSTS
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class BiossesSTSVN(AbsTaskSTS):
+    metadata = TaskMetadata(
+        name="BIOSSES-VN",
+        dataset={
+            "path": "GreenNode/biosses-sts-vn",
+            "revision": "1dae4a6df91c0852680cd4ab48c8c1d8a9ed49b2",
+        },
+        description="""A translated dataset from Biomedical Semantic Similarity Estimation.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://tabilab.cmpe.boun.edu.tr/BIOSSES/DataSet.html",
+        type="STS",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="cosine_spearman",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Medical"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["BIOSSES"],
+    )
+
+    @property
+    def metadata_dict(self) -> dict[str, str]:
+        metadata_dict = super().metadata_dict
+        metadata_dict["min_score"] = 0
+        metadata_dict["max_score"] = 5
+        return metadata_dict
diff --git a/mteb/tasks/STS/vie/STSBenchmarkSTSVN.py b/mteb/tasks/STS/vie/STSBenchmarkSTSVN.py
new file mode 100644
index 0000000000..1c7879a01d
--- /dev/null
+++ b/mteb/tasks/STS/vie/STSBenchmarkSTSVN.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskSTS import AbsTaskSTS
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class STSBenchmarkSTSVN(AbsTaskSTS):
+    metadata = TaskMetadata(
+        name="STSBenchmark-VN",
+        dataset={
+            "path": "GreenNode/stsbenchmark-sts-vn",
+            "revision": "f24d66738cda4a02138ada5af7689a92ce1fcad6",
+        },
+        description="""A translated dataset from Semantic Textual Similarity Benchmark (STSbenchmark) dataset.
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://github.com/PhilipMay/stsb-multi-mt/",
+        type="STS",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="cosine_spearman",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Blog", "News", "Written"],
+        task_subtypes=[],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["STSBenchmark"],
+    )
+
+    @property
+    def metadata_dict(self) -> dict[str, str]:
+        metadata_dict = super().metadata_dict
+        metadata_dict["min_score"] = 0
+        metadata_dict["max_score"] = 5
+        return metadata_dict
diff --git a/mteb/tasks/STS/vie/SickrSTSVN.py b/mteb/tasks/STS/vie/SickrSTSVN.py
new file mode 100644
index 0000000000..93c5d585d2
--- /dev/null
+++ b/mteb/tasks/STS/vie/SickrSTSVN.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from mteb.abstasks.AbsTaskSTS import AbsTaskSTS
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+
+class SickrSTSVN(AbsTaskSTS):
+    metadata = TaskMetadata(
+        name="SICK-R-VN",
+        dataset={
+            "path": "GreenNode/sickr-sts-vn",
+            "revision": "bc89f0401983c456b609f7fb324278f346b2cccf",
+        },
+        description="""A translated dataset from Semantic Textual Similarity SICK-R dataset as described here:
+            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
+            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
+            - Applies advanced embedding models to filter the translations.
+            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        reference="https://aclanthology.org/2020.lrec-1.207",
+        type="STS",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["vie-Latn"],
+        main_score="cosine_spearman",
+        date=("2025-07-29", "2025-07-30"),
+        license="cc-by-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="machine-translated and LM verified",
+        domains=["Web", "Written"],
+        task_subtypes=["Textual Entailment"],
+        bibtex_citation=r"""
+@misc{pham2025vnmtebvietnamesemassivetext,
+  archiveprefix = {arXiv},
+  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
+  eprint = {2507.21500},
+  primaryclass = {cs.CL},
+  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
+  url = {https://arxiv.org/abs/2507.21500},
+  year = {2025},
+}
+""",
+        adapted_from=["SICK-R"],
+    )
+
+    @property
+    def metadata_dict(self) -> dict[str, str]:
+        metadata_dict = super().metadata_dict
+        metadata_dict["min_score"] = 0
+        metadata_dict["max_score"] = 5
+        return metadata_dict
diff --git a/mteb/tasks/STS/vie/__init__.py b/mteb/tasks/STS/vie/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/scripts/data/clean_and_update_tasks.py b/scripts/data/clean_and_update_tasks.py
index e74576b14b..caeed2649a 100644
--- a/scripts/data/clean_and_update_tasks.py
+++ b/scripts/data/clean_and_update_tasks.py
@@ -7,13 +7,13 @@
 import warnings
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Literal, Optional
+from typing import Literal
 
 import datasets
 import orjson
 import pandas as pd
 import typer
-from datasets import Dataset, DatasetDict, load_dataset
+from datasets import Dataset, DatasetDict
 from huggingface_hub import HfApi
 from tqdm import tqdm
 
@@ -777,7 +777,7 @@ def update_v2_metadata_dataset(
     lines, ds_deleted = _update_dataset_dict(lines, call_node, new_path, new_revision)
     lines = _update_eval_splits(lines, call_node, module)
 
-    all_deleted_indices = sorted(list(set(desc_deleted + ds_deleted)), reverse=True)
+    all_deleted_indices = sorted(set(desc_deleted + ds_deleted), reverse=True)
     for i in all_deleted_indices:
         del lines[i]
 
@@ -984,7 +984,7 @@ def create_and_prepare(
         "scripts/data/cleaning_reports", exists=True, dir_okay=True
     ),
     username: str = "mteb",
-    start_lang: Optional[str] = None,
+    start_lang: str | None = None,
     verbose: bool = typer.Option(False, "--verbose"),
 ) -> None:
     changed_tasks: list[tuple[str, int]] = []
@@ -1020,7 +1020,7 @@ def create_and_prepare(
             report_folder, folder.name, all_original_records, all_filter_records
         )
 
-        unique_changed = sorted(list(set(changed_tasks)))
+        unique_changed = sorted(set(changed_tasks))
         tasks_str = " ".join(
             f"{task_name} {task_name}.v{version}"
             for task_name, version in unique_changed
@@ -1038,7 +1038,7 @@ def compare_results(
     results_dir: Path = typer.Option(
         "/home/admin/vatolin/experiments/mteb/results", exists=True, dir_okay=True
     ),
-    tasks_file: Optional[Path] = typer.Option(
+    tasks_file: Path | None = typer.Option(
         None,
         "--tasks-file",
         "-f",
diff --git a/tests/test_models/test_model_meta.py b/tests/test_models/test_model_meta.py
index f0c54dd99c..3a2b214068 100644
--- a/tests/test_models/test_model_meta.py
+++ b/tests/test_models/test_model_meta.py
@@ -62,6 +62,7 @@ def test_model_similar_tasks(training_datasets):
         "Touche2020",
         "Touche2020-Fa",
         "Touche2020-NL",
+        "Touche2020-VN",
         "Touche2020Retrieval.v3",
     ]
     assert sorted(dummy_model_meta.get_training_datasets().keys()) == expected