From dbae46eaf6b081574073041ffb5740c8cf94bf8f Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 25 Jun 2025 13:46:23 +0200 Subject: [PATCH 1/4] fix: Update model selection for the leaderboard fixes #2834 This removed the lower bound selection, but generally I don't think people should care about the models being too small. --- mteb/leaderboard/app.py | 46 ++++++++++++++++++++++++----------------- pyproject.toml | 1 - 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index 838a4874d8..e76958f1c7 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -13,7 +13,6 @@ import cachetools import gradio as gr import pandas as pd -from gradio_rangeslider import RangeSlider import mteb from mteb.abstasks.TaskMetadata import TASK_DOMAIN, TASK_TYPE @@ -158,10 +157,10 @@ def filter_models( availability: bool | None, compatibility: list[str], instructions: bool | None, - model_size: tuple[int | None, int | None], + model_size: int, zero_shot_setting: Literal["only_zero_shot", "allow_all", "remove_unknown"], ): - lower, upper = model_size + lower, upper = 0, model_size # Setting to None, when the user doesn't specify anything if (lower == MIN_MODEL_SIZE) or (lower is None): lower = None @@ -179,6 +178,7 @@ def filter_models( frameworks=compatibility, n_parameters_range=(lower, upper), ) + models_to_keep = set() for model_meta in model_metas: is_model_zero_shot = model_meta.is_zero_shot_on(task_select) @@ -217,7 +217,7 @@ def get_leaderboard_app() -> gr.Blocks: availability=None, compatibility=[], instructions=None, - model_size=(MIN_MODEL_SIZE, MAX_MODEL_SIZE), + model_size=MAX_MODEL_SIZE, zero_shot_setting="allow_all", ) @@ -378,11 +378,19 @@ def get_leaderboard_app() -> gr.Blocks: label="Zero-shot", interactive=True, ) - model_size = RangeSlider( - minimum=MIN_MODEL_SIZE, - maximum=MAX_MODEL_SIZE, - value=(MIN_MODEL_SIZE, MAX_MODEL_SIZE), - label="Model Size (#M Parameters)", + + max_model_size = gr.Radio( + [ + ("<100M", 100), + ("<500M", 500), + ("<1M", 1000), + ("<5B", 5000), + ("<10B", 10000), + (">10B", MAX_MODEL_SIZE), + ], + value=MAX_MODEL_SIZE, + label="Model Parameters", + interactive=True, ) with gr.Tab("Summary"): @@ -599,7 +607,7 @@ def update_models( availability: bool | None, compatibility: list[str], instructions: bool | None, - model_size: tuple[int, int], + model_size: int, zero_shot: Literal["allow_all", "remove_unknown", "only_zero_shot"], ): start_time = time.time() @@ -628,7 +636,7 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot, ], outputs=[models], @@ -641,7 +649,7 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot, ], outputs=[models], @@ -654,7 +662,7 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot, ], outputs=[models], @@ -667,7 +675,7 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot, ], outputs=[models], @@ -680,12 +688,12 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot, ], outputs=[models], ) - model_size.change( + max_model_size.change( update_models, inputs=[ scores, @@ -693,7 +701,7 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot, ], outputs=[models], @@ -706,7 +714,7 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot, ], outputs=[models], @@ -784,7 +792,7 @@ def update_tables( availability=None, compatibility=[], instructions=None, - model_size=(MIN_MODEL_SIZE, MAX_MODEL_SIZE), + model_size=MAX_MODEL_SIZE, zero_shot="allow_all", ) # We have to call this both on the filtered and unfiltered task because the callbacks diff --git a/pyproject.toml b/pyproject.toml index 1df97b7153..e3261adbc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,6 @@ speedtask = [ peft = ["peft>=0.11.0"] leaderboard = [ "gradio==5.27.1; python_version > '3.9'", # 3.10 is required for gradio - "gradio_rangeslider>=0.0.8", "plotly>=5.24.0,<6.0.0", "cachetools>=5.2.0", "matplotlib>=3.9.4", From 0220400e28ec7940d421fdbc3f7a7e83b1bbbf7e Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 25 Jun 2025 13:50:27 +0200 Subject: [PATCH 2/4] fix 1M --> 1B --- mteb/leaderboard/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index e76958f1c7..857e330429 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -383,7 +383,7 @@ def get_leaderboard_app() -> gr.Blocks: [ ("<100M", 100), ("<500M", 500), - ("<1M", 1000), + ("<1B", 1000), ("<5B", 5000), ("<10B", 10000), (">10B", MAX_MODEL_SIZE), From 1ce58e6d16dfef56c3590258f46970e32104c68b Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 25 Jun 2025 13:50:30 +0200 Subject: [PATCH 3/4] format --- mteb/models/overview.py | 2 +- mteb/models/seed_1_6_embedding_models.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index c187c1f899..cc31a1787c 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -79,8 +79,8 @@ ru_sentence_models, salesforce_models, searchmap_models, - seed_models, seed_1_6_embedding_models, + seed_models, sentence_transformers_models, shuu_model, siglip_models, diff --git a/mteb/models/seed_1_6_embedding_models.py b/mteb/models/seed_1_6_embedding_models.py index e4bb8fa16d..275553022a 100644 --- a/mteb/models/seed_1_6_embedding_models.py +++ b/mteb/models/seed_1_6_embedding_models.py @@ -1,26 +1,26 @@ from __future__ import annotations -import os +import base64 import logging +import os import time +from concurrent.futures import ThreadPoolExecutor, as_completed from functools import partial +from io import BytesIO from typing import Any -import base64 -from io import BytesIO import numpy as np +import requests import torch -from torch.utils.data import DataLoader -from PIL import Image import tqdm +from PIL import Image +from torch.utils.data import DataLoader from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta from mteb.models.bge_models import bge_chinese_training_data from mteb.models.wrapper import Wrapper from mteb.requires_package import requires_package -import requests -from concurrent.futures import ThreadPoolExecutor, as_completed logger = logging.getLogger(__name__) From e6d958027033dff322ad2ab286336930730598f4 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 25 Jun 2025 13:59:45 +0200 Subject: [PATCH 4/4] rename model_size -> max_model_size --- mteb/leaderboard/app.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index 857e330429..8903301022 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -157,10 +157,10 @@ def filter_models( availability: bool | None, compatibility: list[str], instructions: bool | None, - model_size: int, + max_model_size: int, zero_shot_setting: Literal["only_zero_shot", "allow_all", "remove_unknown"], ): - lower, upper = 0, model_size + lower, upper = 0, max_model_size # Setting to None, when the user doesn't specify anything if (lower == MIN_MODEL_SIZE) or (lower is None): lower = None @@ -217,7 +217,7 @@ def get_leaderboard_app() -> gr.Blocks: availability=None, compatibility=[], instructions=None, - model_size=MAX_MODEL_SIZE, + max_model_size=MAX_MODEL_SIZE, zero_shot_setting="allow_all", ) @@ -588,7 +588,7 @@ def update_task_list( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot: hash( ( id(scores), @@ -596,7 +596,7 @@ def update_task_list( hash(availability), hash(tuple(compatibility)), hash(instructions), - hash(model_size), + hash(max_model_size), hash(zero_shot), ) ), @@ -607,7 +607,7 @@ def update_models( availability: bool | None, compatibility: list[str], instructions: bool | None, - model_size: int, + max_model_size: int, zero_shot: Literal["allow_all", "remove_unknown", "only_zero_shot"], ): start_time = time.time() @@ -618,7 +618,7 @@ def update_models( availability, compatibility, instructions, - model_size, + max_model_size, zero_shot_setting=zero_shot, ) elapsed = time.time() - start_time @@ -792,7 +792,7 @@ def update_tables( availability=None, compatibility=[], instructions=None, - model_size=MAX_MODEL_SIZE, + max_model_size=MAX_MODEL_SIZE, zero_shot="allow_all", ) # We have to call this both on the filtered and unfiltered task because the callbacks