embeddings-benchmark · KennethEnevoldsen · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025
diff --git a/.github/ISSUE_TEMPLATE/eval_request.yaml b/.github/ISSUE_TEMPLATE/eval_request.yaml
@@ -0,0 +1,33 @@
+name: 📊 Evaluation Request
+description: Create a request for a model to be evaluated in MTEB
+title: "Evaluate model: {model_id}"
+labels: ["evaluation request"]
+body:
+  - type: input
+    attributes:
+      label: Model link on Hugging Face
+      description: Please provide a link to the model on Hugging Face. If the model is closed-source, please provide a link to the model provider or documentation.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: What do you want it to be evaluated on?
+      description: Please specify the tasks or benchmarks you would like this model to be evaluated on.
+    validations:
+      required: True
+  - type: dropdown
+    id: contribute
+    attributes:
+      label: Are you interested in contributing to the evaluation of this model?
+      description: By default MTEB maintainters will only handle evaluation on private subsets due to resource constraints. If you are interested in contributing to the evaluation, please let us know. 
+      options:
+        - "Yes"
+        - "No"
+  - type: dropdown
+    id: exists
+    attributes:
+      label: Does this model already exist in MTEB?
+      description: If you are unsure, please check using mteb model registry (e.g. using `mteb.get_model_meta("model_id")`).
+      options:
+        - "Yes"
+        - "No"
diff --git a/mteb/benchmarks/benchmarks/rteb_benchmarks.py b/mteb/benchmarks/benchmarks/rteb_benchmarks.py
@@ -4,10 +4,10 @@
 from mteb.benchmarks.benchmark import RtebBenchmark
 from mteb.overview import get_tasks
 
-RTEB_CITATION = r"""@article{rteb2024,
-  author = {RTEB Authors},
-  title = {RTEB: Retrieval Embedding Benchmark for Multi-Domain Text Retrieval},
-  year = {2024},
+RTEB_CITATION = r"""@article{rteb2025,
+  author = {Liu, Frank and Enevoldsen, Kenneth and Solomatin, Roman and Chung, Isaac and Aarsen, Tom and Fődi, Zoltán},
+  title = {Introducing RTEB: A New Standard for Retrieval Evaluation},
+  year = {2025},
 }"""
 
 RTEB_MAIN = RtebBenchmark(
@@ -48,7 +48,7 @@
             "JapaneseLegal1Retrieval",
         ],
     ),
-    description="RTEB (Retrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains 29 diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across mutliple languages.",
+    description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -83,7 +83,7 @@
         ],
         languages=["eng"],
     ),
-    description="RTEB English subset containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes 20 diverse tasks covering specialized domains.",
+    description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -101,7 +101,7 @@
         ],
         languages=["fra"],
     ),
-    description="RTEB French subset containing retrieval tasks in French across legal and general knowledge domains. Includes 3 diverse multilingual tasks.",
+    description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -119,7 +119,7 @@
             "GermanLegal1Retrieval",
         ],
     ),
-    description="RTEB German subset containing retrieval tasks in German across legal, healthcare, and business domains. Includes 4 diverse tasks.",
+    description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -135,7 +135,7 @@
             "JapaneseLegal1Retrieval",
         ],
     ),
-    description="RTEB Japanese subset containing retrieval tasks in Japanese across legal and code domains. Includes 2 diverse multilingual tasks.",
+    description="RTEB Japanese is a subset of RTEB  containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -156,7 +156,7 @@
             "EnglishFinance4Retrieval",
         ],
     ),
-    description="RTEB Finance subset containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. Includes 7 specialized finance tasks.",
+    description="RTEB Finance is a subset of RTEB  containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -177,7 +177,7 @@
             "JapaneseLegal1Retrieval",
         ],
     ),
-    description="RTEB Legal subset containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. Includes 7 legal tasks across English, French, German, and Japanese.",
+    description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -199,7 +199,7 @@
             "JapaneseCode1Retrieval",
         ],
     ),
-    description="RTEB Code subset containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. Includes 8 code-related tasks.",
+    description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -217,7 +217,7 @@
             "GermanHealthcare1Retrieval",
         ],
     ),
-    description="RTEB Healthcare subset containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. Includes 4 healthcare tasks.",
+    description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )