diff --git a/mteb/benchmarks/_create_table.py b/mteb/benchmarks/_create_table.py index 23e4296339..5acaf039e7 100644 --- a/mteb/benchmarks/_create_table.py +++ b/mteb/benchmarks/_create_table.py @@ -303,6 +303,7 @@ def _create_per_language_table_from_benchmark_results( def _create_summary_table_mean_public_private( benchmark_results: BenchmarkResults, + exclude_private_from_borda: bool = False, ) -> pd.DataFrame: """Create summary table from BenchmarkResults. @@ -311,6 +312,7 @@ def _create_summary_table_mean_public_private( Args: benchmark_results: BenchmarkResults object containing model results + exclude_private_from_borda: If True, calculate Borda rank using only public tasks Returns: DataFrame with model summaries, ready for styling in the leaderboard @@ -356,7 +358,11 @@ def _create_summary_table_mean_public_private( joint_table = joint_table.drop(models_to_remove, axis=0) joint_table.insert(0, "mean(public)", public_mean) joint_table.insert(1, "mean(private)", private_mean) - joint_table["borda_rank"] = _get_borda_rank(per_task) + if exclude_private_from_borda: + borda_per_task = per_task[public_task_name] + else: + borda_per_task = per_task + joint_table["borda_rank"] = _get_borda_rank(borda_per_task) joint_table = joint_table.sort_values("borda_rank", ascending=True) joint_table = joint_table.reset_index() diff --git a/mteb/benchmarks/benchmark.py b/mteb/benchmarks/benchmark.py index 3b6dd4aa0e..74c93f74b8 100644 --- a/mteb/benchmarks/benchmark.py +++ b/mteb/benchmarks/benchmark.py @@ -123,7 +123,9 @@ def _create_summary_table( _create_summary_table_mean_public_private, ) - joint_table = _create_summary_table_mean_public_private(benchmark_results) + joint_table = _create_summary_table_mean_public_private( + benchmark_results, exclude_private_from_borda=True + ) # issue 3902: temporary remove the private column from RTEB summary table if "Mean (Private)" in joint_table.columns: joint_table = joint_table.drop(columns=["Mean (Private)"])