Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mteb/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Benchmark:

Args:
name: The name of the benchmark
aliases: Alternative names for the benchmark
tasks: The tasks within the benchmark.
description: A description of the benchmark, should include its intended goal and potentially a description of its construction
reference: A link reference, to a source containing additional information typically to a paper, leaderboard or github.
Expand All @@ -38,6 +39,7 @@ class Benchmark:

name: str
tasks: Sequence[AbsTask]
aliases: Sequence[str] = field(default_factory=tuple)
description: str | None = None
reference: StrURL | None = None
citation: str | None = None
Expand Down
23 changes: 22 additions & 1 deletion mteb/benchmarks/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

MTEB_EN = Benchmark(
name="MTEB(eng, v2)",
aliases=["MTEB(eng)"],
display_name="English",
icon="https://github.com/lipis/flag-icons/raw/refs/heads/main/flags/4x3/us.svg",
tasks=MTEBTasks(
Expand Down Expand Up @@ -89,6 +90,7 @@

MTEB_ENG_CLASSIC = Benchmark(
name="MTEB(eng, v1)",
aliases=["MTEB(eng, classic)", "MTEB"],
display_name="English Legacy",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/gb.svg",
tasks=MTEBTasks(
Expand Down Expand Up @@ -185,6 +187,7 @@

MTEB_MAIN_RU = Benchmark(
name="MTEB(rus, v1)",
aliases=["MTEB(rus)"],
display_name="Russian legacy",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
tasks=MTEBTasks(
Expand Down Expand Up @@ -344,6 +347,7 @@

MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark(
name="FollowIR",
aliases=["MTEB(Retrieval w/Instructions)"],
display_name="Instruction Following",
tasks=get_tasks(
tasks=[
Expand Down Expand Up @@ -394,7 +398,9 @@
)

MTEB_RETRIEVAL_LAW = Benchmark(
name="MTEB(Law, v1)", # This benchmark is likely in the need of an update
# This benchmark is likely in the need of an update
name="MTEB(Law, v1)",
aliases=["MTEB(law)"],
display_name="Legal",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-library.svg",
tasks=get_tasks(
Expand All @@ -416,6 +422,7 @@

MTEB_RETRIEVAL_MEDICAL = Benchmark(
name="MTEB(Medical, v1)",
aliases=["MTEB(Medical)"],
display_name="Medical",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-hospital.svg",
tasks=get_tasks(
Expand Down Expand Up @@ -469,6 +476,7 @@

SEB = Benchmark(
name="MTEB(Scandinavian, v1)",
aliases=["MTEB(Scandinavian)", "SEB"],
display_name="Scandinavian",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
Expand Down Expand Up @@ -595,6 +603,7 @@

MTEB_FRA = Benchmark(
name="MTEB(fra, v1)",
aliases=["MTEB(fra)"],
display_name="French",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/fr.svg",
tasks=MTEBTasks(
Expand Down Expand Up @@ -653,6 +662,7 @@

MTEB_DEU = Benchmark(
name="MTEB(deu, v1)",
aliases=["MTEB(deu)"],
display_name="German",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/de.svg",
tasks=get_tasks(
Expand Down Expand Up @@ -704,6 +714,7 @@

MTEB_KOR = Benchmark(
name="MTEB(kor, v1)",
aliases=["MTEB(kor)"],
display_name="Korean",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
tasks=get_tasks(
Expand All @@ -728,6 +739,7 @@

MTEB_POL = Benchmark(
name="MTEB(pol, v1)",
aliases=["MTEB(pol)"],
display_name="Polish",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/pl.svg",
tasks=MTEBTasks(
Expand Down Expand Up @@ -777,6 +789,7 @@

MTEB_code = Benchmark(
name="MTEB(Code, v1)",
aliases=["MTEB(code)"],
display_name="Code",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-tech-electronics.svg",
tasks=get_tasks(
Expand Down Expand Up @@ -953,6 +966,7 @@

MTEB_multilingual_v2 = Benchmark(
name="MTEB(Multilingual, v2)",
aliases=["MTEB(Multilingual)", "MMTEB"],
display_name="Multilingual",
language_view=[
"eng-Latn", # English
Expand Down Expand Up @@ -986,6 +1000,7 @@

MTEB_JPN = Benchmark(
name="MTEB(jpn, v1)",
aliases=["MTEB(jpn)"],
display_name="Japanese Legacy",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
tasks=get_tasks(
Expand Down Expand Up @@ -1056,6 +1071,7 @@

MTEB_INDIC = Benchmark(
name="MTEB(Indic, v1)",
aliases=["MTEB(Indic)"],
display_name="Indic",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/in.svg",
tasks=MTEBTasks(
Expand Down Expand Up @@ -1146,6 +1162,7 @@

MTEB_EU = Benchmark(
name="MTEB(Europe, v1)",
aliases=["MTEB(Europe)"],
display_name="European",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/eu.svg",
tasks=get_tasks(
Expand Down Expand Up @@ -1285,6 +1302,7 @@

BRIGHT_LONG = Benchmark(
name="BRIGHT (long)",
aliases=["BRIGHT(long)"],
tasks=MTEBTasks(
(
get_task(
Expand Down Expand Up @@ -1400,6 +1418,7 @@

C_MTEB = Benchmark(
name="MTEB(cmn, v1)",
aliases=["MTEB(Chinese)", "CMTEB"],
display_name="Chinese",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/cn.svg",
tasks=MTEBTasks(
Expand Down Expand Up @@ -1466,6 +1485,7 @@

FA_MTEB = Benchmark(
name="MTEB(fas, v1)",
aliases=["FaMTEB(fas, beta)"],
display_name="Farsi Legacy",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ir.svg",
tasks=get_tasks(
Expand Down Expand Up @@ -2347,6 +2367,7 @@

VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
name="ViDoRe(v1&v2)",
aliases=["VisualDocumentRetrieval"],
display_name="ViDoRe (V1&V2)",
tasks=get_tasks(
tasks=[
Expand Down
69 changes: 14 additions & 55 deletions mteb/benchmarks/get_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import difflib
import logging
import warnings
from functools import lru_cache

from .benchmark import Benchmark
Expand All @@ -20,53 +19,16 @@ def _build_registry() -> dict[str, Benchmark]:
return benchmark_registry


def _get_previous_benchmark_names() -> dict[str, str]:
from .benchmarks import (
BRIGHT_LONG,
C_MTEB,
FA_MTEB,
MTEB_DEU,
MTEB_EN,
MTEB_ENG_CLASSIC,
MTEB_EU,
MTEB_FRA,
MTEB_INDIC,
MTEB_JPN,
MTEB_KOR,
MTEB_MAIN_RU,
MTEB_POL,
MTEB_RETRIEVAL_LAW,
MTEB_RETRIEVAL_MEDICAL,
MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
SEB,
VISUAL_DOCUMENT_RETRIEVAL,
MTEB_code,
MTEB_multilingual_v2,
)

previous_benchmark_names = {
"MTEB(eng)": MTEB_EN.name,
"MTEB(eng, classic)": MTEB_ENG_CLASSIC.name,
"MTEB(rus)": MTEB_MAIN_RU.name,
"MTEB(Retrieval w/Instructions)": MTEB_RETRIEVAL_WITH_INSTRUCTIONS.name,
"MTEB(law)": MTEB_RETRIEVAL_LAW.name,
"MTEB(Medical)": MTEB_RETRIEVAL_MEDICAL.name,
"MTEB(Scandinavian)": SEB.name,
"MTEB(fra)": MTEB_FRA.name,
"MTEB(deu)": MTEB_DEU.name,
"MTEB(kor)": MTEB_KOR.name,
"MTEB(pol)": MTEB_POL.name,
"MTEB(code)": MTEB_code.name,
"MTEB(Multilingual)": MTEB_multilingual_v2.name,
"MTEB(jpn)": MTEB_JPN.name,
"MTEB(Indic)": MTEB_INDIC.name,
"MTEB(Europe)": MTEB_EU.name,
"MTEB(Chinese)": C_MTEB.name,
"FaMTEB(fas, beta)": FA_MTEB.name,
"BRIGHT(long)": BRIGHT_LONG.name,
"VisualDocumentRetrieval": VISUAL_DOCUMENT_RETRIEVAL.name,
}
return previous_benchmark_names
@lru_cache
def _build_aliases_registry() -> dict[str, Benchmark]:
import mteb.benchmarks.benchmarks as benchmark_module

aliases: dict[str, Benchmark] = {}
for _, inst in benchmark_module.__dict__.items():
if isinstance(inst, Benchmark) and inst.aliases is not None:
for alias in inst.aliases:
aliases[alias] = inst
return aliases


def get_benchmark(
Expand All @@ -80,14 +42,11 @@ def get_benchmark(
Returns:
The Benchmark instance corresponding to the given name.
"""
previous_benchmark_names = _get_previous_benchmark_names()
benchmark_registry = _build_registry()
if benchmark_name in previous_benchmark_names:
warnings.warn(
f"Using the previous benchmark name '{benchmark_name}' is deprecated. Please use '{previous_benchmark_names[benchmark_name]}' instead.",
DeprecationWarning,
)
benchmark_name = previous_benchmark_names[benchmark_name]
aliases_registry = _build_aliases_registry()

if benchmark_name in aliases_registry:
return aliases_registry[benchmark_name]
if benchmark_name not in benchmark_registry:
close_matches = difflib.get_close_matches(
benchmark_name, benchmark_registry.keys()
Expand Down
18 changes: 18 additions & 0 deletions tests/test_benchmarks/test_get_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,21 @@
def test_get_benchmark(name):
benchmark = mteb.get_benchmark(benchmark_name=name)
assert isinstance(benchmark, mteb.Benchmark)


@pytest.mark.parametrize(
"alias, full_name",
[
(
"MTEB(eng, classic)",
"MTEB(eng, v1)",
),
("MTEB(rus)", "MTEB(rus, v1)"),
("MTEB(Scandinavian)", "MTEB(Scandinavian, v1)"),
],
)
def test_benchmark_aliases(alias, full_name):
benchmark = mteb.get_benchmark(benchmark_name=alias)
assert benchmark.name == full_name
assert isinstance(benchmark, mteb.Benchmark)
assert alias in benchmark.aliases