Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
2f4606c
feat: add leaderboard CLI command with cache-path option
isaac-chung Dec 28, 2025
492c767
test: add comprehensive tests for leaderboard CLI command
isaac-chung Dec 28, 2025
a6dfcea
try to fix install
isaac-chung Dec 28, 2025
1add54e
fix: lazy-load leaderboard to avoid requiring deps for CLI
isaac-chung Dec 28, 2025
bc94b2c
Update mteb/cli/build_cli.py
isaac-chung Dec 28, 2025
10bb4ac
make lint
isaac-chung Dec 28, 2025
dee0d7e
remove AGENTS.md
isaac-chung Dec 29, 2025
8450b96
move import to top of file
isaac-chung Dec 29, 2025
6355415
log the default cache path
isaac-chung Dec 29, 2025
74a2172
Improve leaderboard tests to verify actual cache paths
isaac-chung Dec 29, 2025
37cd876
Combine leaderboard cache tests using pytest parametrize
isaac-chung Dec 29, 2025
d4df501
Update make run-leaderboard to use new CLI and remove app.py main block
isaac-chung Dec 29, 2025
2a24b7a
feat: add theme and head parameters to leaderboard CLI
isaac-chung Dec 29, 2025
4e1b72e
fix: suppress leaderboard warnings on CLI launch
isaac-chung Dec 29, 2025
63804e7
test: update leaderboard tests for theme and head params
isaac-chung Dec 29, 2025
8730ef0
Revert "Update make run-leaderboard to use new CLI and remove app.py …
isaac-chung Dec 29, 2025
72f3c4f
Update mteb/cli/build_cli.py
isaac-chung Dec 30, 2025
d5f1f06
docs: update leaderboard CLI usage
isaac-chung Dec 30, 2025
a1fdff3
update docs to show defaults
isaac-chung Dec 30, 2025
3a478f1
fix: apply ruff formatting
isaac-chung Dec 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ install:
install-for-tests:
@echo "--- 🚀 Installing project dependencies for test ---"
@echo "This ensures that the project is not installed in editable mode"
pip install ".[bm25s,pylate,image,codecarbon,faiss-cpu]" --group dev
pip install ".[bm25s,pylate,image,codecarbon,leaderboard,faiss-cpu]" --group dev

lint:
@echo "--- 🧹 Running linters ---"
Expand Down
30 changes: 29 additions & 1 deletion docs/usage/leaderboard.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,35 @@ This section contains information on how to interact with the leaderboard includ
It is possible to completely deploy the leaderboard locally or self-host it. This can e.g. be relevant for companies that might want to
integrate build their own benchmarks or integrate custom tasks into existing benchmarks.

Running the leaderboard is quite easy. Simply run:
The leaderboard can be run in two ways:

#### Using the CLI Command

The easiest way to run the leaderboard is using the MTEB CLI:

```bash
mteb leaderboard
```

You can also specify a custom cache path for model results:

```bash
mteb leaderboard --cache-path results
```

Additional options:
- `--host HOST`: Specify the host to run the server on (default: 0.0.0.0)
- `--port PORT`: Specify the port to run the server on (default: 7860)
- `--share`: Create a public URL for the leaderboard

Example with all options:
```bash
mteb leaderboard --cache-path results --port 8080 --share
```

#### Using Make Command
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would probably delete this since most users don't git clone the repo


Alternatively, you can use the Makefile:
```bash
make run-leaderboard
```
Expand Down
90 changes: 90 additions & 0 deletions mteb/cli/build_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,95 @@ def _add_create_meta_parser(subparsers) -> None:
parser.set_defaults(func=_create_meta)


def _add_leaderboard_parser(subparsers) -> None:
parser = subparsers.add_parser("leaderboard", help="Launch the MTEB leaderboard")

parser.add_argument(
"--cache-path",
type=str,
help="Path to the cache folder containing model results",
required=False,
default=None,
)
parser.add_argument(
"--host",
type=str,
default="0.0.0.0",
help="Host to run the leaderboard server on",
)
parser.add_argument(
"--port",
type=int,
default=7860,
help="Port to run the leaderboard server on",
)
parser.add_argument(
"--share",
action="store_true",
default=False,
help="Create a public URL for the leaderboard",
)

parser.set_defaults(func=_leaderboard)


def _leaderboard(args: argparse.Namespace) -> None:
"""Launch the MTEB leaderboard with specified cache path."""
# Import leaderboard module only when needed to avoid requiring leaderboard dependencies
# for other CLI commands
try:
import gradio as gr

from mteb.leaderboard import get_leaderboard_app
except ImportError as e:
raise ImportError(
"Seems like some dependencies are not installed. "
+ "You can likely install these using: `pip install mteb[leaderboard]`. "
+ f"{e}"
)

cache_path = args.cache_path

if cache_path:
logger.info(f"Using cache path: {cache_path}")
cache = ResultCache(cache_path)
else:
cache = ResultCache()
logger.info(f"Using default cache path: {cache.cache_path}")

app = get_leaderboard_app(cache)

logger.info(f"Starting leaderboard on {args.host}:{args.port}")
if args.share:
logger.info("Creating public URL...")

logging.getLogger("mteb.load_results.task_results").setLevel(
logging.ERROR
) # Warnings related to task split
logging.getLogger("mteb.model_meta").setLevel(
logging.ERROR
) # Warning related to model metadata (fetch_from_hf=False)
logging.getLogger("mteb.load_results.benchmark_results").setLevel(
logging.ERROR
) # Warning related to model metadata (fetch_from_hf=False)
warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")

# Head content for Tailwind CSS
head = """
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
"""

app.launch(
server_name=args.host,
server_port=args.port,
share=args.share,
theme=gr.themes.Soft(
font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
),
head=head,
)


def build_cli() -> argparse.ArgumentParser:
"""Builds the argument parser for the MTEB CLI.

Expand All @@ -380,6 +469,7 @@ def build_cli() -> argparse.ArgumentParser:
_add_available_tasks_parser(subparsers)
_add_available_benchmarks_parser(subparsers)
_add_create_meta_parser(subparsers)
_add_leaderboard_parser(subparsers)

return parser

Expand Down
106 changes: 106 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
from argparse import Namespace
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest
import yaml
Expand All @@ -12,6 +13,7 @@
_available_benchmarks,
_available_tasks,
_create_meta,
_leaderboard,
run,
)

Expand Down Expand Up @@ -197,3 +199,107 @@ def test_create_meta_from_existing(
command = f"{sys.executable} -m mteb create-model-results --model-name {model_name} --results-folder {output_folder.as_posix()} --output-path {output_path.as_posix()} --from-existing {existing_readme.as_posix()} --overwrite"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
assert result.returncode == 0, "Command failed"


def test_leaderboard_help():
"""Test that leaderboard help command works."""
command = [sys.executable, "-m", "mteb", "leaderboard", "--help"]
result = subprocess.run(command, capture_output=True, text=True)

assert result.returncode == 0, "Leaderboard help command failed"
assert "--cache-path" in result.stdout, "--cache-path option not found in help"
assert "--host" in result.stdout, "--host option not found in help"
assert "--port" in result.stdout, "--port option not found in help"
assert "--share" in result.stdout, "--share option not found in help"
assert "Path to the cache folder containing model results" in result.stdout, (
"Cache path description not found"
)


@pytest.mark.parametrize(
"cache_path_input,host,port,share,test_description",
[
("custom", "localhost", 8080, True, "custom cache path"),
(None, "127.0.0.1", 7860, False, "default cache path"),
],
)
def test_leaderboard_cache_paths(
tmp_path: Path, cache_path_input, host, port, share, test_description
):
"""Test leaderboard with different cache path configurations."""

# Set up cache path based on parameter
if cache_path_input == "custom":
custom_cache = tmp_path / "my_results"
custom_cache.mkdir(exist_ok=True)
cache_path = str(custom_cache)
expected_cache_path = custom_cache
else:
cache_path = None
from mteb.cache import ResultCache

expected_cache_path = ResultCache().default_cache_path

# Mock the get_leaderboard_app function and the gradio app
mock_app = MagicMock()
mock_app.launch = MagicMock()

# Create a mock function that captures the cache argument and returns our mock app
def mock_get_app_func(cache):
# Store the cache for verification
mock_get_app_func.called_with_cache = cache
return mock_app

# Mock gradio themes
mock_theme = MagicMock()
mock_font = MagicMock()

# Patch the local import inside _leaderboard function
with patch.dict(
"sys.modules",
{
"mteb.leaderboard": MagicMock(get_leaderboard_app=mock_get_app_func),
"gradio": MagicMock(
themes=MagicMock(
Soft=MagicMock(return_value=mock_theme),
GoogleFont=MagicMock(return_value=mock_font),
)
),
},
):
args = Namespace(
cache_path=cache_path,
host=host,
port=port,
share=share,
)

_leaderboard(args)

# Verify get_leaderboard_app was called with a cache that has the correct path
assert hasattr(mock_get_app_func, "called_with_cache"), (
"get_leaderboard_app was not called"
)
cache_instance = mock_get_app_func.called_with_cache
assert cache_instance.cache_path == expected_cache_path, (
f"Expected cache path {expected_cache_path}, got {cache_instance.cache_path}"
)

# Verify launch parameters
mock_app.launch.assert_called_once_with(
server_name=host,
server_port=port,
share=share,
theme=mock_theme,
head='\n <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">\n ',
)


def test_leaderboard_cli_integration():
"""Test the full CLI command integration."""
# Test that the command is recognized by the CLI
command = [sys.executable, "-m", "mteb", "--help"]
result = subprocess.run(command, capture_output=True, text=True)

assert result.returncode == 0
assert "leaderboard" in result.stdout, "Leaderboard command not found in main help"