Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mteb/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class Benchmark:
citation: str | None = None
contacts: list[str] | None = None
display_on_leaderboard: bool = True
icon: str | None = None
display_name: str | None = None

def __iter__(self):
return iter(self.tasks)
Expand Down
47 changes: 46 additions & 1 deletion mteb/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

MTEB_EN = Benchmark(
name="MTEB(eng, v2)",
display_name="English",
icon="https://github.com/lipis/flag-icons/raw/refs/heads/main/flags/4x3/us.svg",
tasks=MTEBTasks(
get_tasks(
tasks=[
Expand Down Expand Up @@ -97,6 +99,8 @@

MTEB_ENG_CLASSIC = Benchmark(
name="MTEB(eng, v1)",
display_name="English Legacy",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/gb.svg",
tasks=MTEBTasks(
get_tasks(
tasks=[
Expand Down Expand Up @@ -189,6 +193,8 @@

MTEB_MAIN_RU = Benchmark(
name="MTEB(rus, v1)",
display_name="Russian",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
tasks=get_tasks(
languages=["rus"],
tasks=[
Expand Down Expand Up @@ -239,6 +245,7 @@

MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark(
name="FollowIR",
display_name="Instruction Following",
tasks=get_tasks(
tasks=[
"Robust04InstructionRetrieval",
Expand All @@ -260,6 +267,8 @@

MTEB_RETRIEVAL_LAW = Benchmark(
name="MTEB(Law, v1)", # This benchmark is likely in the need of an update
display_name="Legal",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-library.svg",
tasks=get_tasks(
tasks=[
"AILACasedocs",
Expand All @@ -279,6 +288,8 @@

MTEB_RETRIEVAL_MEDICAL = Benchmark(
name="MTEB(Medical, v1)",
display_name="Medical",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-hospital.svg",
tasks=get_tasks(
tasks=[
"CUREv1",
Expand Down Expand Up @@ -328,6 +339,8 @@

SEB = Benchmark(
name="MTEB(Scandinavian, v1)",
display_name="Scandinavian",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
tasks=get_tasks(
tasks=[
# Bitext
Expand Down Expand Up @@ -379,6 +392,7 @@

CoIR = Benchmark(
name="CoIR",
display_name="Code Information Retrieval",
tasks=get_tasks(
tasks=[
"AppsRetrieval",
Expand Down Expand Up @@ -408,6 +422,7 @@

RAR_b = Benchmark(
name="RAR-b",
display_name="Reasoning retrieval",
tasks=get_tasks(
tasks=[
"ARCChallenge",
Expand Down Expand Up @@ -442,6 +457,8 @@

MTEB_FRA = Benchmark(
name="MTEB(fra, v1)",
display_name="French",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/fr.svg",
tasks=MTEBTasks(
get_tasks(
languages=["fra"],
Expand Down Expand Up @@ -496,6 +513,8 @@

MTEB_DEU = Benchmark(
name="MTEB(deu, v1)",
display_name="German",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/de.svg",
tasks=get_tasks(
languages=["deu"],
exclusive_language_filter=True,
Expand Down Expand Up @@ -543,6 +562,8 @@

MTEB_KOR = Benchmark(
name="MTEB(kor, v1)",
display_name="Korean",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
tasks=get_tasks(
languages=["kor"],
tasks=[ # @KennethEnevoldsen: We could probably expand this to a more solid benchamrk, but for now I have left it as is.
Expand All @@ -565,6 +586,8 @@

MTEB_POL = Benchmark(
name="MTEB(pol, v1)",
display_name="Polish",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/pl.svg",
tasks=MTEBTasks(
get_tasks(
languages=["pol"],
Expand Down Expand Up @@ -610,6 +633,8 @@

MTEB_code = Benchmark(
name="MTEB(Code, v1)",
display_name="Code",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-tech-electronics.svg",
tasks=get_tasks(
tasks=[
# Retrieval
Expand Down Expand Up @@ -649,6 +674,8 @@

MTEB_multilingual = Benchmark(
name="MTEB(Multilingual, v1)",
display_name="Multilingual",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-globe.svg",
tasks=get_tasks(
tasks=[
"BornholmBitextMining",
Expand Down Expand Up @@ -793,6 +820,8 @@

MTEB_JPN = Benchmark(
name="MTEB(jpn, v1)",
display_name="Japanese",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
tasks=get_tasks(
languages=["jpn"],
tasks=[
Expand Down Expand Up @@ -861,6 +890,8 @@

MTEB_INDIC = Benchmark(
name="MTEB(Indic, v1)",
display_name="Indic",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/in.svg",
tasks=MTEBTasks(
get_tasks(
tasks=[
Expand Down Expand Up @@ -952,6 +983,8 @@

MTEB_EU = Benchmark(
name="MTEB(Europe, v1)",
display_name="European",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/eu.svg",
tasks=get_tasks(
tasks=[
"BornholmBitextMining",
Expand Down Expand Up @@ -1040,6 +1073,7 @@

LONG_EMBED = Benchmark(
name="LongEmbed",
display_name="Long-context Retrieval",
tasks=get_tasks(
tasks=[
"LEMBNarrativeQARetrieval",
Expand Down Expand Up @@ -1130,7 +1164,6 @@
primaryClass={cs.SE},
url={https://arxiv.org/abs/2406.14497},
}""",
display_on_leaderboard=False,
)

BEIR = Benchmark(
Expand Down Expand Up @@ -1191,6 +1224,8 @@

C_MTEB = Benchmark(
name="MTEB(cmn, v1)",
display_name="Chinese",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/cn.svg",
tasks=MTEBTasks(
get_tasks(
tasks=[
Expand Down Expand Up @@ -1253,6 +1288,8 @@

FA_MTEB = Benchmark(
name="MTEB(fas, beta)",
display_name="Farsi (BETA)",
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ir.svg",
tasks=get_tasks(
languages=["fas"],
tasks=[
Expand Down Expand Up @@ -1333,6 +1370,8 @@

CHEMTEB = Benchmark(
name="ChemTEB",
display_name="Chemical",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
tasks=get_tasks(
tasks=[
"PubChemSMILESBitextMining",
Expand Down Expand Up @@ -1546,6 +1585,8 @@

MIEB_ENG = Benchmark(
name="MIEB(eng)",
display_name="Images, English",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-picture.svg",
tasks=get_tasks(
tasks=MIEB_common_tasks
+ [
Expand All @@ -1571,6 +1612,8 @@

MIEB_MULTILINGUAL = Benchmark(
name="MIEB(Multilingual)",
display_name="Images, Multilingual",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-pictures.svg",
tasks=get_tasks(
tasks=MIEB_common_tasks
+ [
Expand Down Expand Up @@ -1602,6 +1645,8 @@

MIEB_LITE = Benchmark(
name="MIEB(lite)",
display_name="Images, Lite",
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-landscape.svg",
tasks=get_tasks(
tasks=[
# Image Classification
Expand Down
Loading