embeddings-benchmark · isaac-chung · Dec 30, 2025 · Dec 28, 2025 · Dec 28, 2025 · Dec 28, 2025
diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ install:
 install-for-tests:
 	@echo "--- 🚀 Installing project dependencies for test ---"
 	@echo "This ensures that the project is not installed in editable mode"
-	pip install ".[bm25s,pylate,image,codecarbon,faiss-cpu]" --group dev
+	pip install ".[bm25s,pylate,image,codecarbon,leaderboard,faiss-cpu]" --group dev
 
 lint:
 	@echo "--- 🧹 Running linters ---"

diff --git a/docs/usage/leaderboard.md b/docs/usage/leaderboard.md
@@ -7,7 +7,35 @@ This section contains information on how to interact with the leaderboard includ
 It is possible to completely deploy the leaderboard locally or self-host it. This can e.g. be relevant for companies that might want to
 integrate build their own benchmarks or integrate custom tasks into existing benchmarks.
 
-Running the leaderboard is quite easy. Simply run:
+The leaderboard can be run in two ways:
+
+#### Using the CLI Command
+
+The easiest way to run the leaderboard is using the MTEB CLI:
+
+```bash
+mteb leaderboard
+```
+
+You can also specify a custom cache path for model results:
+
+```bash
+mteb leaderboard --cache-path results
+```
+
+Additional options:
+- `--host HOST`: Specify the host to run the server on (default: 0.0.0.0)
+- `--port PORT`: Specify the port to run the server on (default: 7860)
+- `--share`: Create a public URL for the leaderboard
+
+Example with all options:
+```bash
+mteb leaderboard --cache-path results --port 8080 --share
+```
+
+#### Using Make Command
+
+Alternatively, you can use the Makefile:
 ```bash
 make run-leaderboard
 ```

diff --git a/mteb/cli/build_cli.py b/mteb/cli/build_cli.py
@@ -361,6 +361,95 @@ def _add_create_meta_parser(subparsers) -> None:
     parser.set_defaults(func=_create_meta)
 
 
+def _add_leaderboard_parser(subparsers) -> None:
+    parser = subparsers.add_parser("leaderboard", help="Launch the MTEB leaderboard")
+
+    parser.add_argument(
+        "--cache-path",
+        type=str,
+        help="Path to the cache folder containing model results",
+        required=False,
+        default=None,
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="0.0.0.0",
+        help="Host to run the leaderboard server on",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=7860,
+        help="Port to run the leaderboard server on",
+    )
+    parser.add_argument(
+        "--share",
+        action="store_true",
+        default=False,
+        help="Create a public URL for the leaderboard",
+    )
+
+    parser.set_defaults(func=_leaderboard)
+
+
+def _leaderboard(args: argparse.Namespace) -> None:
+    """Launch the MTEB leaderboard with specified cache path."""
+    # Import leaderboard module only when needed to avoid requiring leaderboard dependencies
+    # for other CLI commands
+    try:
+        import gradio as gr
+
+        from mteb.leaderboard import get_leaderboard_app
+    except ImportError as e:
+        raise ImportError(
+            "Seems like some dependencies are not installed. "
+            + "You can likely install these using: `pip install mteb[leaderboard]`. "
+            + f"{e}"
+        )
+
+    cache_path = args.cache_path
+
+    if cache_path:
+        logger.info(f"Using cache path: {cache_path}")
+        cache = ResultCache(cache_path)
+    else:
+        cache = ResultCache()
+        logger.info(f"Using default cache path: {cache.cache_path}")
+
+    app = get_leaderboard_app(cache)
+
+    logger.info(f"Starting leaderboard on {args.host}:{args.port}")
+    if args.share:
+        logger.info("Creating public URL...")
+
+    logging.getLogger("mteb.load_results.task_results").setLevel(
+        logging.ERROR
+    )  # Warnings related to task split
+    logging.getLogger("mteb.model_meta").setLevel(
+        logging.ERROR
+    )  # Warning related to model metadata (fetch_from_hf=False)
+    logging.getLogger("mteb.load_results.benchmark_results").setLevel(
+        logging.ERROR
+    )  # Warning related to model metadata (fetch_from_hf=False)
+    warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
+
+    # Head content for Tailwind CSS
+    head = """
+    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
+    """
+
+    app.launch(
+        server_name=args.host,
+        server_port=args.port,
+        share=args.share,
+        theme=gr.themes.Soft(
+            font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
+        ),
+        head=head,
+    )
+
+
 def build_cli() -> argparse.ArgumentParser:
     """Builds the argument parser for the MTEB CLI.
 
@@ -380,6 +469,7 @@ def build_cli() -> argparse.ArgumentParser:
     _add_available_tasks_parser(subparsers)
     _add_available_benchmarks_parser(subparsers)
     _add_create_meta_parser(subparsers)
+    _add_leaderboard_parser(subparsers)
 
     return parser
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -4,6 +4,7 @@
 import sys
 from argparse import Namespace
 from pathlib import Path
+from unittest.mock import MagicMock, patch
 
 import pytest
 import yaml
@@ -12,6 +13,7 @@
     _available_benchmarks,
     _available_tasks,
     _create_meta,
+    _leaderboard,
     run,
 )
 
@@ -197,3 +199,107 @@ def test_create_meta_from_existing(
     command = f"{sys.executable} -m mteb create-model-results --model-name {model_name} --results-folder {output_folder.as_posix()} --output-path {output_path.as_posix()} --from-existing {existing_readme.as_posix()} --overwrite"
     result = subprocess.run(command, shell=True, capture_output=True, text=True)
     assert result.returncode == 0, "Command failed"
+
+
+def test_leaderboard_help():
+    """Test that leaderboard help command works."""
+    command = [sys.executable, "-m", "mteb", "leaderboard", "--help"]
+    result = subprocess.run(command, capture_output=True, text=True)
+
+    assert result.returncode == 0, "Leaderboard help command failed"
+    assert "--cache-path" in result.stdout, "--cache-path option not found in help"
+    assert "--host" in result.stdout, "--host option not found in help"
+    assert "--port" in result.stdout, "--port option not found in help"
+    assert "--share" in result.stdout, "--share option not found in help"
+    assert "Path to the cache folder containing model results" in result.stdout, (
+        "Cache path description not found"
+    )
+
+
+@pytest.mark.parametrize(
+    "cache_path_input,host,port,share,test_description",
+    [
+        ("custom", "localhost", 8080, True, "custom cache path"),
+        (None, "127.0.0.1", 7860, False, "default cache path"),
+    ],
+)
+def test_leaderboard_cache_paths(
+    tmp_path: Path, cache_path_input, host, port, share, test_description
+):
+    """Test leaderboard with different cache path configurations."""
+
+    # Set up cache path based on parameter
+    if cache_path_input == "custom":
+        custom_cache = tmp_path / "my_results"
+        custom_cache.mkdir(exist_ok=True)
+        cache_path = str(custom_cache)
+        expected_cache_path = custom_cache
+    else:
+        cache_path = None
+        from mteb.cache import ResultCache
+
+        expected_cache_path = ResultCache().default_cache_path
+
+    # Mock the get_leaderboard_app function and the gradio app
+    mock_app = MagicMock()
+    mock_app.launch = MagicMock()
+
+    # Create a mock function that captures the cache argument and returns our mock app
+    def mock_get_app_func(cache):
+        # Store the cache for verification
+        mock_get_app_func.called_with_cache = cache
+        return mock_app
+
+    # Mock gradio themes
+    mock_theme = MagicMock()
+    mock_font = MagicMock()
+
+    # Patch the local import inside _leaderboard function
+    with patch.dict(
+        "sys.modules",
+        {
+            "mteb.leaderboard": MagicMock(get_leaderboard_app=mock_get_app_func),
+            "gradio": MagicMock(
+                themes=MagicMock(
+                    Soft=MagicMock(return_value=mock_theme),
+                    GoogleFont=MagicMock(return_value=mock_font),
+                )
+            ),
+        },
+    ):
+        args = Namespace(
+            cache_path=cache_path,
+            host=host,
+            port=port,
+            share=share,
+        )
+
+        _leaderboard(args)
+
+        # Verify get_leaderboard_app was called with a cache that has the correct path
+        assert hasattr(mock_get_app_func, "called_with_cache"), (
+            "get_leaderboard_app was not called"
+        )
+        cache_instance = mock_get_app_func.called_with_cache
+        assert cache_instance.cache_path == expected_cache_path, (
+            f"Expected cache path {expected_cache_path}, got {cache_instance.cache_path}"
+        )
+
+        # Verify launch parameters
+        mock_app.launch.assert_called_once_with(
+            server_name=host,
+            server_port=port,
+            share=share,
+            theme=mock_theme,
+            head='\n    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">\n    ',
+        )
+
+
+def test_leaderboard_cli_integration():
+    """Test the full CLI command integration."""
+    # Test that the command is recognized by the CLI
+    command = [sys.executable, "-m", "mteb", "--help"]
+    result = subprocess.run(command, capture_output=True, text=True)
+
+    assert result.returncode == 0
+    assert "leaderboard" in result.stdout, "Leaderboard command not found in main help"