diff --git a/Makefile b/Makefile
index e40c191a73..fa7a67aca7 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ install:
install-for-tests:
@echo "--- ๐ Installing project dependencies for test ---"
@echo "This ensures that the project is not installed in editable mode"
- pip install ".[bm25s,pylate,image,codecarbon,faiss-cpu]" --group dev
+ pip install ".[bm25s,pylate,image,codecarbon,leaderboard,faiss-cpu]" --group dev
lint:
@echo "--- ๐งน Running linters ---"
diff --git a/docs/usage/leaderboard.md b/docs/usage/leaderboard.md
index 47ee1d8860..f55871cb14 100644
--- a/docs/usage/leaderboard.md
+++ b/docs/usage/leaderboard.md
@@ -7,7 +7,35 @@ This section contains information on how to interact with the leaderboard includ
It is possible to completely deploy the leaderboard locally or self-host it. This can e.g. be relevant for companies that might want to
integrate build their own benchmarks or integrate custom tasks into existing benchmarks.
-Running the leaderboard is quite easy. Simply run:
+The leaderboard can be run in two ways:
+
+#### Using the CLI Command
+
+The easiest way to run the leaderboard is using the MTEB CLI:
+
+```bash
+mteb leaderboard
+```
+
+You can also specify a custom cache path for model results:
+
+```bash
+mteb leaderboard --cache-path results
+```
+
+Additional options:
+- `--host HOST`: Specify the host to run the server on (default: 0.0.0.0)
+- `--port PORT`: Specify the port to run the server on (default: 7860)
+- `--share`: Create a public URL for the leaderboard
+
+Example with all options:
+```bash
+mteb leaderboard --cache-path results --port 8080 --share
+```
+
+#### Using Make Command
+
+Alternatively, you can use the Makefile:
```bash
make run-leaderboard
```
diff --git a/mteb/cli/build_cli.py b/mteb/cli/build_cli.py
index c307320a4a..4aad361db7 100644
--- a/mteb/cli/build_cli.py
+++ b/mteb/cli/build_cli.py
@@ -361,6 +361,95 @@ def _add_create_meta_parser(subparsers) -> None:
parser.set_defaults(func=_create_meta)
+def _add_leaderboard_parser(subparsers) -> None:
+ parser = subparsers.add_parser("leaderboard", help="Launch the MTEB leaderboard")
+
+ parser.add_argument(
+ "--cache-path",
+ type=str,
+ help="Path to the cache folder containing model results",
+ required=False,
+ default=None,
+ )
+ parser.add_argument(
+ "--host",
+ type=str,
+ default="0.0.0.0",
+ help="Host to run the leaderboard server on",
+ )
+ parser.add_argument(
+ "--port",
+ type=int,
+ default=7860,
+ help="Port to run the leaderboard server on",
+ )
+ parser.add_argument(
+ "--share",
+ action="store_true",
+ default=False,
+ help="Create a public URL for the leaderboard",
+ )
+
+ parser.set_defaults(func=_leaderboard)
+
+
+def _leaderboard(args: argparse.Namespace) -> None:
+ """Launch the MTEB leaderboard with specified cache path."""
+ # Import leaderboard module only when needed to avoid requiring leaderboard dependencies
+ # for other CLI commands
+ try:
+ import gradio as gr
+
+ from mteb.leaderboard import get_leaderboard_app
+ except ImportError as e:
+ raise ImportError(
+ "Seems like some dependencies are not installed. "
+ + "You can likely install these using: `pip install mteb[leaderboard]`. "
+ + f"{e}"
+ )
+
+ cache_path = args.cache_path
+
+ if cache_path:
+ logger.info(f"Using cache path: {cache_path}")
+ cache = ResultCache(cache_path)
+ else:
+ cache = ResultCache()
+ logger.info(f"Using default cache path: {cache.cache_path}")
+
+ app = get_leaderboard_app(cache)
+
+ logger.info(f"Starting leaderboard on {args.host}:{args.port}")
+ if args.share:
+ logger.info("Creating public URL...")
+
+ logging.getLogger("mteb.load_results.task_results").setLevel(
+ logging.ERROR
+ ) # Warnings related to task split
+ logging.getLogger("mteb.model_meta").setLevel(
+ logging.ERROR
+ ) # Warning related to model metadata (fetch_from_hf=False)
+ logging.getLogger("mteb.load_results.benchmark_results").setLevel(
+ logging.ERROR
+ ) # Warning related to model metadata (fetch_from_hf=False)
+ warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
+
+ # Head content for Tailwind CSS
+ head = """
+
+ """
+
+ app.launch(
+ server_name=args.host,
+ server_port=args.port,
+ share=args.share,
+ theme=gr.themes.Soft(
+ font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
+ ),
+ head=head,
+ )
+
+
def build_cli() -> argparse.ArgumentParser:
"""Builds the argument parser for the MTEB CLI.
@@ -380,6 +469,7 @@ def build_cli() -> argparse.ArgumentParser:
_add_available_tasks_parser(subparsers)
_add_available_benchmarks_parser(subparsers)
_add_create_meta_parser(subparsers)
+ _add_leaderboard_parser(subparsers)
return parser
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5637af21bb..68d99efc91 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -4,6 +4,7 @@
import sys
from argparse import Namespace
from pathlib import Path
+from unittest.mock import MagicMock, patch
import pytest
import yaml
@@ -12,6 +13,7 @@
_available_benchmarks,
_available_tasks,
_create_meta,
+ _leaderboard,
run,
)
@@ -197,3 +199,107 @@ def test_create_meta_from_existing(
command = f"{sys.executable} -m mteb create-model-results --model-name {model_name} --results-folder {output_folder.as_posix()} --output-path {output_path.as_posix()} --from-existing {existing_readme.as_posix()} --overwrite"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
assert result.returncode == 0, "Command failed"
+
+
+def test_leaderboard_help():
+ """Test that leaderboard help command works."""
+ command = [sys.executable, "-m", "mteb", "leaderboard", "--help"]
+ result = subprocess.run(command, capture_output=True, text=True)
+
+ assert result.returncode == 0, "Leaderboard help command failed"
+ assert "--cache-path" in result.stdout, "--cache-path option not found in help"
+ assert "--host" in result.stdout, "--host option not found in help"
+ assert "--port" in result.stdout, "--port option not found in help"
+ assert "--share" in result.stdout, "--share option not found in help"
+ assert "Path to the cache folder containing model results" in result.stdout, (
+ "Cache path description not found"
+ )
+
+
+@pytest.mark.parametrize(
+ "cache_path_input,host,port,share,test_description",
+ [
+ ("custom", "localhost", 8080, True, "custom cache path"),
+ (None, "127.0.0.1", 7860, False, "default cache path"),
+ ],
+)
+def test_leaderboard_cache_paths(
+ tmp_path: Path, cache_path_input, host, port, share, test_description
+):
+ """Test leaderboard with different cache path configurations."""
+
+ # Set up cache path based on parameter
+ if cache_path_input == "custom":
+ custom_cache = tmp_path / "my_results"
+ custom_cache.mkdir(exist_ok=True)
+ cache_path = str(custom_cache)
+ expected_cache_path = custom_cache
+ else:
+ cache_path = None
+ from mteb.cache import ResultCache
+
+ expected_cache_path = ResultCache().default_cache_path
+
+ # Mock the get_leaderboard_app function and the gradio app
+ mock_app = MagicMock()
+ mock_app.launch = MagicMock()
+
+ # Create a mock function that captures the cache argument and returns our mock app
+ def mock_get_app_func(cache):
+ # Store the cache for verification
+ mock_get_app_func.called_with_cache = cache
+ return mock_app
+
+ # Mock gradio themes
+ mock_theme = MagicMock()
+ mock_font = MagicMock()
+
+ # Patch the local import inside _leaderboard function
+ with patch.dict(
+ "sys.modules",
+ {
+ "mteb.leaderboard": MagicMock(get_leaderboard_app=mock_get_app_func),
+ "gradio": MagicMock(
+ themes=MagicMock(
+ Soft=MagicMock(return_value=mock_theme),
+ GoogleFont=MagicMock(return_value=mock_font),
+ )
+ ),
+ },
+ ):
+ args = Namespace(
+ cache_path=cache_path,
+ host=host,
+ port=port,
+ share=share,
+ )
+
+ _leaderboard(args)
+
+ # Verify get_leaderboard_app was called with a cache that has the correct path
+ assert hasattr(mock_get_app_func, "called_with_cache"), (
+ "get_leaderboard_app was not called"
+ )
+ cache_instance = mock_get_app_func.called_with_cache
+ assert cache_instance.cache_path == expected_cache_path, (
+ f"Expected cache path {expected_cache_path}, got {cache_instance.cache_path}"
+ )
+
+ # Verify launch parameters
+ mock_app.launch.assert_called_once_with(
+ server_name=host,
+ server_port=port,
+ share=share,
+ theme=mock_theme,
+ head='\n \n ',
+ )
+
+
+def test_leaderboard_cli_integration():
+ """Test the full CLI command integration."""
+ # Test that the command is recognized by the CLI
+ command = [sys.executable, "-m", "mteb", "--help"]
+ result = subprocess.run(command, capture_output=True, text=True)
+
+ assert result.returncode == 0
+ assert "leaderboard" in result.stdout, "Leaderboard command not found in main help"