From bbcbf4882b268af221c3b6d10a088797c3b658f5 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib19@gmail.com>
Date: Wed, 5 Nov 2025 15:19:59 +0100
Subject: [PATCH 1/3] run all hf-providers

---
 src/lighteval/main_inspect.py | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/lighteval/main_inspect.py b/src/lighteval/main_inspect.py
index be7958873..e13471526 100644
--- a/src/lighteval/main_inspect.py
+++ b/src/lighteval/main_inspect.py
@@ -24,6 +24,7 @@
 from collections import defaultdict
 from typing import Literal
 
+import requests
 from huggingface_hub import HfApi
 from inspect_ai import Epochs, Task, task
 from inspect_ai import eval_set as inspect_ai_eval_set
@@ -182,13 +183,31 @@ def _format_metric_cell(data: dict, col: str, metric: str, stderr_metric: str) -
     return "-"
 
 
+def _get_huggingface_providers(model_id: str):
+    model_id = model_id.replace("hf-inference-providers/", "").replace(":all", "")
+    url = f"https://huggingface.co/api/models/{model_id}"
+    params = {"expand[]": "inferenceProviderMapping"}
+    response = requests.get(url, params=params)
+    response.raise_for_status()  # raise exception for HTTP errors
+    data = response.json()
+    # Extract provider mapping if available
+    providers = data.get("inferenceProviderMapping", {})
+
+    live_providers = []
+    for provider, info in providers.items():
+        if info.get("status") == "live":
+            live_providers.append(provider)
+
+    return live_providers
+
+
 HELP_PANEL_NAME_1 = "Modeling Parameters"
 HELP_PANEL_NAME_2 = "Task Parameters"
 HELP_PANEL_NAME_3 = "Connection and parallelization parameters"
 HELP_PANEL_NAME_4 = "Logging parameters"
 
 
-def eval(
+def eval(  # noqa C901
     models: Annotated[list[str], Argument(help="Models to evaluate")],
     tasks: Annotated[str, Argument(help="Tasks to evaluate")],
     # model arguments
@@ -404,13 +423,19 @@ def eval(
     else:
         model_args = {}
 
+    for model in models:
+        if model.split("/")[0] == "hf-inference-providers" and model.split(":")[-1] == "all":
+            providers = _get_huggingface_providers(model)
+            models = [f"{model.replace(':all', '')}:{provider}" for provider in providers]
+
     success, logs = inspect_ai_eval_set(
         inspect_ai_tasks,
         model=models,
         max_connections=max_connections,
         timeout=timeout,
         retry_on_error=retry_on_error,
-        max_retries=max_retries,
+        max_retries=max_retries,  # not counted
+        fail_on_error=True,
         limit=max_samples,
         max_tasks=max_tasks,
         log_dir=log_dir,

From 8c39e758739d2050a03714ca541aa6ac43a07baf Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib19@gmail.com>
Date: Wed, 5 Nov 2025 15:21:06 +0100
Subject: [PATCH 2/3] add example

---
 docs/source/inspect-ai.mdx | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/source/inspect-ai.mdx b/docs/source/inspect-ai.mdx
index 9cdeb8802..30c53ce4d 100644
--- a/docs/source/inspect-ai.mdx
+++ b/docs/source/inspect-ai.mdx
@@ -40,6 +40,13 @@ lighteval eval \
     "lighteval|gpqa:diamond|0"
 ```
 
+You can also compare every providers serving one model in one line:
+
+```bash
+    hf-inference-providers/openai/gpt-oss-20b:all \
+    "lighteval|gpqa:diamond|0"
+```
+
 4. Evaluate a vLLM or SGLang model.
 
 ```bash

From 2486b64a29c8107a7ba5688a0d9634549789c100 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib19@gmail.com>
Date: Wed, 5 Nov 2025 15:22:11 +0100
Subject: [PATCH 3/3] remove uneeded params

---
 src/lighteval/main_inspect.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/lighteval/main_inspect.py b/src/lighteval/main_inspect.py
index e13471526..0bc50c34f 100644
--- a/src/lighteval/main_inspect.py
+++ b/src/lighteval/main_inspect.py
@@ -434,8 +434,7 @@ def eval(  # noqa C901
         max_connections=max_connections,
         timeout=timeout,
         retry_on_error=retry_on_error,
-        max_retries=max_retries,  # not counted
-        fail_on_error=True,
+        max_retries=max_retries,
         limit=max_samples,
         max_tasks=max_tasks,
         log_dir=log_dir,