From 6c73b58efa304d146841afcc89bb78b645634621 Mon Sep 17 00:00:00 2001
From: ethan <smiletoye@gmail.com>
Date: Wed, 1 Oct 2025 17:37:23 +0800
Subject: [PATCH 1/4] Refactor: Move zero-shot percentage calculation to the
 end of summary table creation which only apply to RTEB table.

---
 mteb/benchmarks/_create_table.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/mteb/benchmarks/_create_table.py b/mteb/benchmarks/_create_table.py
index a517a36d62..daee842f6d 100644
--- a/mteb/benchmarks/_create_table.py
+++ b/mteb/benchmarks/_create_table.py
@@ -344,13 +344,6 @@ def _create_summary_table_mean_public_private(
         ),
     )
 
-    # Add zero-shot percentage
-    tasks = get_tasks(tasks=list(data["task_name"].unique()))
-    joint_table.insert(
-        1, "Zero-shot", model_metas.map(lambda m: m.zero_shot_percentage(tasks))
-    )
-    joint_table["Zero-shot"] = joint_table["Zero-shot"].fillna(-1)
-
     # Clean up model names (remove HF organization)
     joint_table["model_name"] = joint_table["model_name"].map(
         lambda name: name.split("/")[-1]
@@ -379,6 +372,11 @@ def _create_summary_table_mean_public_private(
     # Move borda rank to front
     joint_table.insert(0, "Rank (Borda)", joint_table.pop("borda_rank"))
 
+    # Add zero-shot percentage at the end
+    tasks = get_tasks(tasks=list(data["task_name"].unique()))
+    joint_table["Zero-shot"] = model_metas.map(lambda m: m.zero_shot_percentage(tasks))
+    joint_table["Zero-shot"] = joint_table["Zero-shot"].fillna(-1)
+
     return joint_table
 
 

From 8fea520cc6fe628de6f4547b1d4880bd4164425c Mon Sep 17 00:00:00 2001
From: ethan <smiletoye@gmail.com>
Date: Wed, 1 Oct 2025 21:26:13 +0800
Subject: [PATCH 2/4] Update RTEB benchmark name from "RTEB(beta)" to "RTEB"
 for consistency in display.

---
 mteb/benchmarks/benchmarks/rteb_benchmarks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/benchmarks/benchmarks/rteb_benchmarks.py b/mteb/benchmarks/benchmarks/rteb_benchmarks.py
index 7060456ba0..a652513603 100644
--- a/mteb/benchmarks/benchmarks/rteb_benchmarks.py
+++ b/mteb/benchmarks/benchmarks/rteb_benchmarks.py
@@ -11,7 +11,7 @@
 }"""
 
 RTEB_MAIN = RtebBenchmark(
-    name="RTEB(beta)",
+    name="RTEB",
     display_name="RTEB Multilingual",
     icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-search.svg",
     tasks=get_tasks(

From 756d836b123b93454576b59c2a3bdd640b46e646 Mon Sep 17 00:00:00 2001
From: q275343119 <275343119@qq.com>
Date: Sat, 4 Oct 2025 22:14:38 +0800
Subject: [PATCH 3/4] feat - RTEB(beta)

---
 mteb/benchmarks/benchmarks/rteb_benchmarks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mteb/benchmarks/benchmarks/rteb_benchmarks.py b/mteb/benchmarks/benchmarks/rteb_benchmarks.py
index a652513603..7060456ba0 100644
--- a/mteb/benchmarks/benchmarks/rteb_benchmarks.py
+++ b/mteb/benchmarks/benchmarks/rteb_benchmarks.py
@@ -11,7 +11,7 @@
 }"""
 
 RTEB_MAIN = RtebBenchmark(
-    name="RTEB",
+    name="RTEB(beta)",
     display_name="RTEB Multilingual",
     icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-search.svg",
     tasks=get_tasks(

From 0e9dcf12d028dfb06dcfa396ae043aa346bf2b45 Mon Sep 17 00:00:00 2001
From: q275343119 <275343119@qq.com>
Date: Sat, 4 Oct 2025 22:47:22 +0800
Subject: [PATCH 4/4] feat - remove Zero-shot

---
 mteb/benchmarks/_create_table.py |  5 -----
 mteb/leaderboard/app.py          | 14 ++++++++++++++
 mteb/leaderboard/table.py        |  3 ++-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/mteb/benchmarks/_create_table.py b/mteb/benchmarks/_create_table.py
index daee842f6d..3e6a503652 100644
--- a/mteb/benchmarks/_create_table.py
+++ b/mteb/benchmarks/_create_table.py
@@ -372,11 +372,6 @@ def _create_summary_table_mean_public_private(
     # Move borda rank to front
     joint_table.insert(0, "Rank (Borda)", joint_table.pop("borda_rank"))
 
-    # Add zero-shot percentage at the end
-    tasks = get_tasks(tasks=list(data["task_name"].unique()))
-    joint_table["Zero-shot"] = model_metas.map(lambda m: m.zero_shot_percentage(tasks))
-    joint_table["Zero-shot"] = joint_table["Zero-shot"].fillna(-1)
-
     return joint_table
 
 
diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py
index e162b01aeb..98e956c5e7 100644
--- a/mteb/leaderboard/app.py
+++ b/mteb/leaderboard/app.py
@@ -16,6 +16,7 @@
 
 import mteb
 from mteb.abstasks.TaskMetadata import TASK_DOMAIN, TASK_TYPE
+from mteb.benchmarks.benchmark import RtebBenchmark
 from mteb.custom_validators import MODALITIES
 from mteb.leaderboard.benchmark_selector import (
     DEFAULT_BENCHMARK_NAME,
@@ -196,6 +197,14 @@ def filter_models(
     return list(models_to_keep)
 
 
+def should_show_zero_shot_filter(benchmark_name: str) -> bool:
+    benchmark = mteb.get_benchmark(benchmark_name)
+
+    if isinstance(benchmark, RtebBenchmark):
+        return False
+    return True
+
+
 def get_leaderboard_app() -> gr.Blocks:
     logger.info("Loading all benchmark results")
     all_results = load_results()
@@ -479,6 +488,8 @@ def on_benchmark_select(benchmark_name):
             benchmark_results = all_benchmark_results[benchmark_name]
             scores = benchmark_results.get_scores(format="long")
             logger.debug(f"on_benchmark_select callback: {elapsed}s")
+            show_zero_shot = should_show_zero_shot_filter(benchmark_name)
+
             return (
                 languages,
                 domains,
@@ -486,6 +497,7 @@ def on_benchmark_select(benchmark_name):
                 modalities,
                 sorted([task.metadata.name for task in benchmark.tasks]),
                 scores,
+                gr.update(visible=show_zero_shot),
             )
 
         benchmark_select.change(
@@ -498,6 +510,7 @@ def on_benchmark_select(benchmark_name):
                 modality_select,
                 task_select,
                 scores,
+                zero_shot,
             ],
         )
 
@@ -839,6 +852,7 @@ def update_tables(
             bench_modalities,
             bench_tasks,
             bench_scores,
+            zero_shot,
         ) = on_benchmark_select(benchmark.name)
         filtered_models = update_models(
             bench_scores,
diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py
index 732e10d803..3d085de7e8 100644
--- a/mteb/leaderboard/table.py
+++ b/mteb/leaderboard/table.py
@@ -138,7 +138,8 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
     numeric_data = joint_table.copy()
 
     # Format data for display
-    joint_table["Zero-shot"] = joint_table["Zero-shot"].apply(format_zero_shot)
+    if "Zero-shot" in joint_table.columns:
+        joint_table["Zero-shot"] = joint_table["Zero-shot"].apply(format_zero_shot)
     joint_table[score_columns] = joint_table[score_columns].map(format_scores)
 
     joint_table_style = joint_table.style.format(