Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions mteb/benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@
MTEB_multilingual_v2,
RAR_b,
)
from mteb.benchmarks.benchmarks.rteb_benchmarks import (
RTEB_CODE,
RTEB_ENGLISH,
RTEB_FINANCE,
RTEB_FRENCH,
RTEB_GERMAN,
RTEB_HEALTHCARE,
RTEB_LEGAL,
RTEB_MAIN,
)

__all__ = [
"Benchmark",
Expand Down Expand Up @@ -92,4 +102,12 @@
"R2MED",
"VN_MTEB",
"JINA_VDR",
"RTEB_MAIN",
"RTEB_FINANCE",
"RTEB_LEGAL",
"RTEB_CODE",
"RTEB_HEALTHCARE",
"RTEB_ENGLISH",
"RTEB_FRENCH",
"RTEB_GERMAN",
]
25 changes: 24 additions & 1 deletion mteb/leaderboard/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import argparse
import itertools
import json
import logging
Expand All @@ -20,6 +21,7 @@
from mteb.leaderboard.benchmark_selector import (
BENCHMARK_ENTRIES,
DEFAULT_BENCHMARK_NAME,
RTEB_BENCHMARK_ENTRIES,
make_selector,
)
from mteb.leaderboard.figures import performance_size_plot, radar_chart
Expand Down Expand Up @@ -190,7 +192,23 @@ def filter_models(
return list(models_to_keep)


def get_startup_arguments():
parser = argparse.ArgumentParser()

# Add a Boolean flag parameter
parser.add_argument(
"--show_rteb",
action="store_true",
help="If set, display RTEB results; otherwise show default results.",
)

return parser.parse_args()


def get_leaderboard_app() -> gr.Blocks:
args = get_startup_arguments()
show_rteb = args.show_rteb

logger.info("Loading all benchmark results")
all_results = load_results()

Expand Down Expand Up @@ -277,7 +295,12 @@ def get_leaderboard_app() -> gr.Blocks:
visible=True,
width="18%",
):
benchmark_select, column = make_selector(BENCHMARK_ENTRIES)
if show_rteb:
benchmark_select, column = make_selector(
BENCHMARK_ENTRIES + RTEB_BENCHMARK_ENTRIES
)
else:
benchmark_select, column = make_selector(BENCHMARK_ENTRIES)
gr.Markdown(
"""
## Embedding Leaderboard
Expand Down
33 changes: 33 additions & 0 deletions mteb/leaderboard/benchmark_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
import mteb
from mteb import Benchmark
from mteb.benchmarks.benchmarks import MTEB_multilingual_v2
from mteb.benchmarks.benchmarks.rteb_benchmarks import (
RTEB_CODE,
RTEB_ENGLISH,
RTEB_FINANCE,
RTEB_FRENCH,
RTEB_GERMAN,
RTEB_HEALTHCARE,
RTEB_LEGAL,
RTEB_MAIN,
)

DEFAULT_BENCHMARK_NAME = MTEB_multilingual_v2.name

Expand Down Expand Up @@ -92,6 +102,29 @@ class MenuEntry:
),
]

RTEB_BENCHMARK_ENTRIES = [
MenuEntry(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would avoid the duplication on this item - just define and reuse across both (for now)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll modify this part. I will move the benchmarks that are exclusive to RTEB into RTEB_BENCHMARK_ENTRIES.

name="RTEB (Retrieval)",
description=None,
open=False,
benchmarks=[
RTEB_MAIN,
MenuEntry(
"Domain-Specific",
description=None,
open=False,
benchmarks=[RTEB_FINANCE, RTEB_LEGAL, RTEB_CODE, RTEB_HEALTHCARE],
),
MenuEntry(
"Language-specific",
description=None,
open=False,
benchmarks=[RTEB_ENGLISH, RTEB_FRENCH, RTEB_GERMAN],
),
],
)
]


def _create_button(
i: int,
Expand Down
Loading