embeddings-benchmark · KennethEnevoldsen · Jan 19, 2026 · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/mteb/benchmarks/_create_table.py b/mteb/benchmarks/_create_table.py
@@ -303,6 +303,7 @@ def _create_per_language_table_from_benchmark_results(
 
 def _create_summary_table_mean_public_private(
     benchmark_results: BenchmarkResults,
+    exclude_private_from_borda: bool = False,
 ) -> pd.DataFrame:
     """Create summary table from BenchmarkResults.
 
@@ -311,6 +312,7 @@ def _create_summary_table_mean_public_private(
 
     Args:
         benchmark_results: BenchmarkResults object containing model results
+        exclude_private_from_borda: If True, calculate Borda rank using only public tasks
 
     Returns:
         DataFrame with model summaries, ready for styling in the leaderboard
@@ -356,7 +358,11 @@ def _create_summary_table_mean_public_private(
     joint_table = joint_table.drop(models_to_remove, axis=0)
     joint_table.insert(0, "mean(public)", public_mean)
     joint_table.insert(1, "mean(private)", private_mean)
-    joint_table["borda_rank"] = _get_borda_rank(per_task)
+    if exclude_private_from_borda:
+        borda_per_task = per_task[public_task_name]
+    else:
+        borda_per_task = per_task
+    joint_table["borda_rank"] = _get_borda_rank(borda_per_task)
     joint_table = joint_table.sort_values("borda_rank", ascending=True)
     joint_table = joint_table.reset_index()
 

diff --git a/mteb/benchmarks/benchmark.py b/mteb/benchmarks/benchmark.py
@@ -123,9 +123,19 @@ def _create_summary_table(
             _create_summary_table_mean_public_private,
         )
 
-        joint_table = _create_summary_table_mean_public_private(benchmark_results)
+        joint_table = _create_summary_table_mean_public_private(
+            benchmark_results, exclude_private_from_borda=True
+        )
+        # issue 3902: temporary remove the private column from RTEB summary table
+        if "Mean (Private)" in joint_table.columns:
+            joint_table = joint_table.drop(columns=["Mean (Private)"])
         # For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
+        # but due to 3902, if Private column existed, Mean (Task) was the mean of Public and Private so instead we drop Mean (Task) and rename Mean (Public) to Mean (Task)
         joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
+        if "Mean (Task)" in joint_table.columns:
+            joint_table = joint_table.drop(columns=["Mean (Task)"])
+        joint_table = joint_table.rename(columns={"Mean (Public)": "Mean (Task)"})
+
         return joint_table
 
 

diff --git a/mteb/benchmarks/benchmarks/rteb_benchmarks.py b/mteb/benchmarks/benchmarks/rteb_benchmarks.py
@@ -10,6 +10,8 @@
   year = {2025},
 }"""
 
+removal_note = "\n\nNote: We have temporarily removed the 'Private' column to read more about this decision out the [announcement](https://github.com/embeddings-benchmark/mteb/issues/3934)."
+
 RTEB_MAIN = RtebBenchmark(
     name="RTEB(beta)",
     display_name="RTEB Multilingual",
@@ -48,7 +50,8 @@
             "JapaneseLegal1Retrieval",
         ],
     ),
-    description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -83,7 +86,8 @@
         ],
         languages=["eng"],
     ),
-    description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -101,7 +105,8 @@
         ],
         languages=["fra"],
     ),
-    description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -119,7 +124,8 @@
             "GermanLegal1Retrieval",
         ],
     ),
-    description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -135,7 +141,8 @@
             "JapaneseLegal1Retrieval",
         ],
     ),
-    description="RTEB Japanese is a subset of RTEB  containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB Japanese is a subset of RTEB  containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -156,7 +163,8 @@
             "EnglishFinance4Retrieval",
         ],
     ),
-    description="RTEB Finance is a subset of RTEB  containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB Finance is a subset of RTEB  containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -177,7 +185,8 @@
             "JapaneseLegal1Retrieval",
         ],
     ),
-    description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -199,7 +208,8 @@
             "JapaneseCode1Retrieval",
         ],
     ),
-    description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
@@ -217,7 +227,8 @@
             "GermanHealthcare1Retrieval",
         ],
     ),
-    description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
+    description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
+    + removal_note,
     citation=RTEB_CITATION,
     contacts=["fzowl"],
 )
diff --git a/uv.lock b/uv.lock