Bitropy · pkieszcz · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -3218,6 +3218,7 @@ jobs:
               -e DD_API_KEY=$DD_API_KEY \
               -e DD_SITE=$DD_SITE \
               -e LITELLM_LICENSE=$LITELLM_LICENSE \
+              -e LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES=true \
               --add-host host.docker.internal:host-gateway \
               --name my-app \
               -v $(pwd)/litellm/proxy/example_config_yaml/pass_through_config.yaml:/app/config.yaml \

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -156,4 +156,4 @@ LiteLLM is a unified interface for 100+ LLM providers with two main components:
 **Fix options:**
 1. **Create a Prisma migration** (permanent) — run `prisma migrate dev --name <description>` in the worktree. The generated file will be picked up by `prisma migrate deploy` on next startup.
 2. **Apply manually for local dev** — `psql -d litellm -c "ALTER TABLE ... ADD COLUMN IF NOT EXISTS ..."` after each proxy start. Fine for dev, not for production.
-3. **Update litellm-proxy-extras** — if the package is installed from PyPI, its migration directory must include the new file. Either update the package or run the migration manually until the next release ships it.
+3. **Update litellm-proxy-extras** — if the package is installed from PyPI, its migration directory must include the new file. Either update the package or run the migration manually until the next release ships it.
diff --git a/ci_cd/security_scans.sh b/ci_cd/security_scans.sh
@@ -11,7 +11,7 @@ echo "Starting security scans for LiteLLM..."
 install_trivy() {
     echo "Installing Trivy and required tools..."
     sudo apt-get update
-    sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl
+    sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl bsdmainutils
     wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add -
     echo "deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main" | sudo tee -a /etc/apt/sources.list.d/trivy.list
     sudo apt-get update

diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
@@ -660,7 +660,14 @@ def _select_model_name_for_cost_calc(
 
     if custom_pricing is True:
         if router_model_id is not None and router_model_id in litellm.model_cost:
-            return_model = router_model_id
+            entry = litellm.model_cost[router_model_id]
+            if (
+                entry.get("input_cost_per_token") is not None
+                or entry.get("input_cost_per_second") is not None
+            ):
+                return_model = router_model_id
+            else:
+                return_model = model
         else:
             return_model = model
 

diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py
@@ -52,6 +52,7 @@ def forward_headers_from_request(
             # Header We Should NOT forward
             request_headers.pop("content-length", None)
             request_headers.pop("host", None)
+            request_headers.pop("x-litellm-api-key", None)
 
             # Combine request headers with custom headers
             headers = {**request_headers, **headers}

diff --git a/litellm/proxy/_experimental/out/404.html → ...lm/proxy/_experimental/out/404/index.html b/litellm/proxy/_experimental/out/404.html → ...lm/proxy/_experimental/out/404/index.html
diff --git a/...m/proxy/_experimental/out/_not-found.html → ...y/_experimental/out/_not-found/index.html b/...m/proxy/_experimental/out/_not-found.html → ...y/_experimental/out/_not-found/index.html
diff --git a/...roxy/_experimental/out/api-reference.html → ...experimental/out/api-reference/index.html b/...roxy/_experimental/out/api-reference.html → ...experimental/out/api-reference/index.html
diff --git a/...ntal/out/experimental/api-playground.html → ...ut/experimental/api-playground/index.html b/...ntal/out/experimental/api-playground.html → ...ut/experimental/api-playground/index.html
diff --git a/...xperimental/out/experimental/budgets.html → ...ental/out/experimental/budgets/index.html b/...xperimental/out/experimental/budgets.html → ...ental/out/experimental/budgets/index.html
diff --git a/...xperimental/out/experimental/caching.html → ...ental/out/experimental/caching/index.html b/...xperimental/out/experimental/caching.html → ...ental/out/experimental/caching/index.html
diff --git a/...out/experimental/claude-code-plugins.html → ...perimental/claude-code-plugins/index.html b/...out/experimental/claude-code-plugins.html → ...perimental/claude-code-plugins/index.html
diff --git a/...erimental/out/experimental/old-usage.html → ...tal/out/experimental/old-usage/index.html b/...erimental/out/experimental/old-usage.html → ...tal/out/experimental/old-usage/index.html
diff --git a/...xperimental/out/experimental/prompts.html → ...ental/out/experimental/prompts/index.html b/...xperimental/out/experimental/prompts.html → ...ental/out/experimental/prompts/index.html
diff --git a/...ntal/out/experimental/tag-management.html → ...ut/experimental/tag-management/index.html b/...ntal/out/experimental/tag-management.html → ...ut/experimental/tag-management/index.html
diff --git a/...m/proxy/_experimental/out/guardrails.html → ...y/_experimental/out/guardrails/index.html b/...m/proxy/_experimental/out/guardrails.html → ...y/_experimental/out/guardrails/index.html
diff --git a/litellm/proxy/_experimental/out/login.html → .../proxy/_experimental/out/login/index.html b/litellm/proxy/_experimental/out/login.html → .../proxy/_experimental/out/login/index.html
diff --git a/litellm/proxy/_experimental/out/logs.html → ...m/proxy/_experimental/out/logs/index.html b/litellm/proxy/_experimental/out/logs.html → ...m/proxy/_experimental/out/logs/index.html
diff --git a/..._experimental/out/mcp/oauth/callback.html → ...imental/out/mcp/oauth/callback/index.html b/..._experimental/out/mcp/oauth/callback.html → ...imental/out/mcp/oauth/callback/index.html
diff --git a/...lm/proxy/_experimental/out/model-hub.html → ...xy/_experimental/out/model-hub/index.html b/...lm/proxy/_experimental/out/model-hub.html → ...xy/_experimental/out/model-hub/index.html
diff --git a/...lm/proxy/_experimental/out/model_hub.html → ...xy/_experimental/out/model_hub/index.html b/...lm/proxy/_experimental/out/model_hub.html → ...xy/_experimental/out/model_hub/index.html
diff --git a/...xy/_experimental/out/model_hub_table.html → ...perimental/out/model_hub_table/index.html b/...xy/_experimental/out/model_hub_table.html → ...perimental/out/model_hub_table/index.html
diff --git a/...xperimental/out/models-and-endpoints.html → ...ental/out/models-and-endpoints/index.html b/...xperimental/out/models-and-endpoints.html → ...ental/out/models-and-endpoints/index.html
diff --git a/...m/proxy/_experimental/out/onboarding.html → ...y/_experimental/out/onboarding/index.html b/...m/proxy/_experimental/out/onboarding.html → ...y/_experimental/out/onboarding/index.html
diff --git a/...roxy/_experimental/out/organizations.html → ...experimental/out/organizations/index.html b/...roxy/_experimental/out/organizations.html → ...experimental/out/organizations/index.html
diff --git a/...m/proxy/_experimental/out/playground.html → ...y/_experimental/out/playground/index.html b/...m/proxy/_experimental/out/playground.html → ...y/_experimental/out/playground/index.html
diff --git a/...llm/proxy/_experimental/out/policies.html → ...oxy/_experimental/out/policies/index.html b/...llm/proxy/_experimental/out/policies.html → ...oxy/_experimental/out/policies/index.html
diff --git a/...rimental/out/settings/admin-settings.html → ...al/out/settings/admin-settings/index.html b/...rimental/out/settings/admin-settings.html → ...al/out/settings/admin-settings/index.html
diff --git a/...ntal/out/settings/logging-and-alerts.html → ...ut/settings/logging-and-alerts/index.html b/...ntal/out/settings/logging-and-alerts.html → ...ut/settings/logging-and-alerts/index.html
diff --git a/...imental/out/settings/router-settings.html → ...l/out/settings/router-settings/index.html b/...imental/out/settings/router-settings.html → ...l/out/settings/router-settings/index.html
diff --git a/.../_experimental/out/settings/ui-theme.html → ...rimental/out/settings/ui-theme/index.html b/.../_experimental/out/settings/ui-theme.html → ...rimental/out/settings/ui-theme/index.html
diff --git a/litellm/proxy/_experimental/out/teams.html → .../proxy/_experimental/out/teams/index.html b/litellm/proxy/_experimental/out/teams.html → .../proxy/_experimental/out/teams/index.html
diff --git a/...llm/proxy/_experimental/out/test-key.html → ...oxy/_experimental/out/test-key/index.html b/...llm/proxy/_experimental/out/test-key.html → ...oxy/_experimental/out/test-key/index.html
diff --git a/.../_experimental/out/tools/mcp-servers.html → ...rimental/out/tools/mcp-servers/index.html b/.../_experimental/out/tools/mcp-servers.html → ...rimental/out/tools/mcp-servers/index.html
diff --git a/...experimental/out/tools/vector-stores.html → ...mental/out/tools/vector-stores/index.html b/...experimental/out/tools/vector-stores.html → ...mental/out/tools/vector-stores/index.html
diff --git a/litellm/proxy/_experimental/out/usage.html → .../proxy/_experimental/out/usage/index.html b/litellm/proxy/_experimental/out/usage.html → .../proxy/_experimental/out/usage/index.html
diff --git a/litellm/proxy/_experimental/out/users.html → .../proxy/_experimental/out/users/index.html b/litellm/proxy/_experimental/out/users.html → .../proxy/_experimental/out/users/index.html
diff --git a/...proxy/_experimental/out/virtual-keys.html → ..._experimental/out/virtual-keys/index.html b/...proxy/_experimental/out/virtual-keys.html → ..._experimental/out/virtual-keys/index.html
diff --git a/litellm/proxy/example_config_yaml/custom_auth_basic.py b/litellm/proxy/example_config_yaml/custom_auth_basic.py
@@ -1,6 +1,6 @@
 from fastapi import Request
 
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
 
 
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
@@ -9,6 +9,7 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
             api_key="best-api-key-ever",
             user_id="best-user-id-ever",
             team_id="best-team-id-ever",
+            user_role=LitellmUserRoles.PROXY_ADMIN,
         )
     except Exception:
         raise Exception
diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -596,11 +596,24 @@ async def anthropic_proxy_route(
     base_url = httpx.URL(base_target_url)
     updated_url = base_url.copy_with(path=encoded_endpoint)
 
-    # Add or update query parameters
-    anthropic_api_key = passthrough_endpoint_router.get_credentials(
-        custom_llm_provider="anthropic",
-        region_name=None,
-    )
+    # Credential priority: client-provided credentials take precedence over
+    # server credentials. This allows mixed mode where some users bring their
+    # own key (BYOK) or OAuth token (Claude Code Max) while others use the
+    # server's API key.
+    x_api_key_header = request.headers.get("x-api-key", "")
+    auth_header = request.headers.get("authorization", "")
+
+    if x_api_key_header or auth_header:
+        custom_headers = {}
+    else:
+        anthropic_api_key = passthrough_endpoint_router.get_credentials(
+            custom_llm_provider="anthropic",
+            region_name=None,
+        )
+        if anthropic_api_key:
+            custom_headers = {"x-api-key": anthropic_api_key}
+        else:
+            custom_headers = {}
 
     ## check for streaming
     is_streaming_request = await is_streaming_request_fn(request)
@@ -609,7 +622,7 @@ async def anthropic_proxy_route(
     endpoint_func = create_pass_through_route(
         endpoint=endpoint,
         target=str(updated_url),
-        custom_headers={"x-api-key": "{}".format(anthropic_api_key)},
+        custom_headers=custom_headers,
         _forward_headers=True,
         is_streaming_request=is_streaming_request,
     )  # dynamically construct pass-through endpoint based on incoming path

diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
@@ -340,9 +340,16 @@ def _maybe_setup_prometheus_multiproc_dir(
             return
 
         # Check if prometheus is in any callback list
+        # Each setting can be a list or a single string; normalize to list
         callbacks = litellm_settings.get("callbacks") or []
         success_callbacks = litellm_settings.get("success_callback") or []
         failure_callbacks = litellm_settings.get("failure_callback") or []
+        if isinstance(callbacks, str):
+            callbacks = [callbacks]
+        if isinstance(success_callbacks, str):
+            success_callbacks = [success_callbacks]
+        if isinstance(failure_callbacks, str):
+            failure_callbacks = [failure_callbacks]
         all_callbacks = callbacks + success_callbacks + failure_callbacks
         if "prometheus" not in all_callbacks:
             return

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.82.2"
+version = "1.82.3"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -183,7 +183,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.82.2"
+version = "1.82.3"
 version_files = [
     "pyproject.toml:^version"
 ]

diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py
@@ -199,6 +199,7 @@ def test_router_get_model_info_wildcard_routes():
 
 
 @pytest.mark.asyncio
+@pytest.mark.flaky(retries=3, delay=1)
 async def test_router_get_model_group_usage_wildcard_routes():
     os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
     litellm.model_cost = litellm.get_model_cost_map(url="")
@@ -219,7 +220,7 @@ async def test_router_get_model_group_usage_wildcard_routes():
     )
     print(resp)
 
-    await asyncio.sleep(1)
+    await asyncio.sleep(2)
 
     tpm, rpm = await router.get_model_group_usage(model_group="gemini/gemini-1.5-flash")
 

diff --git a/tests/mcp_tests/test_mcp_server.py b/tests/mcp_tests/test_mcp_server.py
@@ -395,6 +395,7 @@ async def test_mcp_http_transport_tool_not_found():
 @pytest.mark.asyncio
 async def test_streamable_http_mcp_handler_mock():
     """Test the streamable HTTP MCP handler functionality"""
+    from litellm.proxy._types import UserAPIKeyAuth
 
     # Mock the session manager and its methods
     mock_session_manager = AsyncMock()
@@ -425,6 +426,8 @@ async def test_streamable_http_mcp_handler_mock():
     ), patch(
         "litellm.proxy._experimental.mcp_server.server.extract_mcp_auth_context",
         AsyncMock(return_value=mock_auth_context),
+    ), patch(
+        "litellm.proxy._experimental.mcp_server.server.set_auth_context",
     ):
         from litellm.proxy._experimental.mcp_server.server import (
             handle_streamable_http_mcp,

diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
@@ -10,7 +10,7 @@
 from fastapi import Request, Response
 from fastapi.testclient import TestClient
 
-from litellm.passthrough.utils import CommonUtils
+from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils
 
 sys.path.insert(
     0, os.path.abspath("../../../..")
@@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases():
     endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke"
     expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke"
     result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint)
-    assert result == expected
+    assert result == expected
+
+
+def test_forward_headers_strips_litellm_api_key():
+    """x-litellm-api-key should not be forwarded to upstream providers."""
+    request_headers = {
+        "x-litellm-api-key": "sk-litellm-secret-key",
+        "content-type": "application/json",
+        "x-api-key": "sk-ant-api-key",
+    }
+
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers.copy(),
+        headers={},
+        forward_headers=True,
+    )
+
+    assert "x-litellm-api-key" not in result
+    assert result.get("content-type") == "application/json"
+    assert result.get("x-api-key") == "sk-ant-api-key"
+
+
+def test_forward_headers_strips_host_and_content_length():
+    """host and content-length should not be forwarded."""
+    request_headers = {
+        "host": "api.anthropic.com",
+        "content-length": "1234",
+        "content-type": "application/json",
+    }
+
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers.copy(),
+        headers={},
+        forward_headers=True,
+    )
+
+    assert "host" not in result
+    assert "content-length" not in result
+    assert result.get("content-type") == "application/json"
diff --git a/tests/test_litellm/proxy/test_prometheus_cleanup.py b/tests/test_litellm/proxy/test_prometheus_cleanup.py
@@ -67,6 +67,30 @@ def test_respects_existing_env_var(self, tmp_path):
             assert os.environ["PROMETHEUS_MULTIPROC_DIR"] == custom_dir
             assert os.path.isdir(custom_dir)
 
+    @pytest.mark.parametrize(
+        "litellm_settings",
+        [
+            {"callbacks": "prometheus"},
+            {"success_callback": "prometheus"},
+            {"failure_callback": "prometheus"},
+            {"callbacks": "custom_callback"},  # string but not prometheus
+        ],
+    )
+    def test_handles_string_callbacks(self, litellm_settings):
+        """When callbacks are specified as a string instead of a list, should not crash."""
+        with patch.dict(os.environ, {}, clear=False):
+            os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None)
+            os.environ.pop("prometheus_multiproc_dir", None)
+
+            # Should not raise TypeError
+            ProxyInitializationHelpers._maybe_setup_prometheus_multiproc_dir(
+                num_workers=4,
+                litellm_settings=litellm_settings,
+            )
+
+            # Cleanup
+            os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None)
+
     @pytest.mark.parametrize(
         "num_workers, litellm_settings",
         [

diff --git a/tests/test_litellm/proxy/test_proxy_cli.py b/tests/test_litellm/proxy/test_proxy_cli.py
@@ -677,7 +677,7 @@ def test_startup_fails_when_db_setup_fails(
         mock_atexit_register,
         mock_subprocess_run,
     ):
-        """Test that proxy exits with code 1 when PrismaManager.setup_database returns False"""
+        """Test that proxy exits with code 1 when PrismaManager.setup_database returns False and --enforce_prisma_migration_check is set"""
         from litellm.proxy.proxy_cli import run_server
 
         mock_subprocess_run.return_value = MagicMock(returncode=0)
@@ -717,7 +717,7 @@ def test_startup_fails_when_db_setup_fails(
 
             with pytest.raises(SystemExit) as exc_info:
                 run_server.main(
-                    ["--local", "--skip_server_startup"], standalone_mode=False
+                    ["--local", "--skip_server_startup", "--enforce_prisma_migration_check"], standalone_mode=False
                 )
             assert exc_info.value.code == 1
             mock_setup_database.assert_called_once_with(use_migrate=True)

diff --git a/tests/test_litellm/test_cost_calculator.py b/tests/test_litellm/test_cost_calculator.py
@@ -388,6 +388,65 @@ def test_custom_pricing_cost_calc_uses_router_model_id_from_litellm_metadata():
     assert custom_model_id not in (selected_model_no_custom or "")
 
 
+def test_per_request_custom_pricing_with_router():
+    """When custom pricing is passed as per-request kwargs (not in model_list),
+    _select_model_name_for_cost_calc should fall back to the model name
+    (where register_model stored the pricing) instead of the router_model_id
+    (which has no pricing data).
+
+    Regression test for the bug where response._hidden_params["response_cost"]
+    returned 0.0 for per-request custom pricing via Router.
+    """
+    from litellm import Router
+    from litellm.cost_calculator import _select_model_name_for_cost_calc
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "openai/gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "openai/gpt-3.5-turbo",
+                    "api_key": "test_api_key",
+                },
+            },
+        ]
+    )
+
+    # Get the deployment's model_id (hash) that the router registered
+    deployment = router.model_list[0]
+    router_model_id = deployment["model_info"]["id"]
+
+    # The router registered this hash in model_cost but without custom pricing
+    assert router_model_id in litellm.model_cost
+    entry = litellm.model_cost[router_model_id]
+    # No custom pricing was set in model_list, so these should be None
+    assert entry.get("input_cost_per_token") is None
+
+    # Now simulate what completion() does: register custom pricing under the model name
+    litellm.register_model(
+        {
+            "openai/gpt-3.5-turbo": {
+                "input_cost_per_token": 2.0,
+                "output_cost_per_token": 2.0,
+                "litellm_provider": "openai",
+            }
+        }
+    )
+
+    # _select_model_name_for_cost_calc should pick the model name (which has pricing),
+    # NOT the router_model_id (which has no pricing)
+    selected = _select_model_name_for_cost_calc(
+        model="openai/gpt-3.5-turbo",
+        completion_response=None,
+        custom_pricing=True,
+        custom_llm_provider="openai",
+        router_model_id=router_model_id,
+    )
+    assert selected is not None
+    assert router_model_id not in selected
+    assert "gpt-3.5-turbo" in selected
+
+
 def test_azure_realtime_cost_calculator():
     os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
     litellm.model_cost = litellm.get_model_cost_map(url="")