Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3218,6 +3218,7 @@ jobs:
-e DD_API_KEY=$DD_API_KEY \
-e DD_SITE=$DD_SITE \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
-e LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES=true \
--add-host host.docker.internal:host-gateway \
--name my-app \
-v $(pwd)/litellm/proxy/example_config_yaml/pass_through_config.yaml:/app/config.yaml \
Expand Down
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,4 @@ LiteLLM is a unified interface for 100+ LLM providers with two main components:
**Fix options:**
1. **Create a Prisma migration** (permanent) — run `prisma migrate dev --name <description>` in the worktree. The generated file will be picked up by `prisma migrate deploy` on next startup.
2. **Apply manually for local dev** — `psql -d litellm -c "ALTER TABLE ... ADD COLUMN IF NOT EXISTS ..."` after each proxy start. Fine for dev, not for production.
3. **Update litellm-proxy-extras** — if the package is installed from PyPI, its migration directory must include the new file. Either update the package or run the migration manually until the next release ships it.
3. **Update litellm-proxy-extras** — if the package is installed from PyPI, its migration directory must include the new file. Either update the package or run the migration manually until the next release ships it.
2 changes: 1 addition & 1 deletion ci_cd/security_scans.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ echo "Starting security scans for LiteLLM..."
install_trivy() {
echo "Installing Trivy and required tools..."
sudo apt-get update
sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl
sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl bsdmainutils
wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add -
echo "deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main" | sudo tee -a /etc/apt/sources.list.d/trivy.list
sudo apt-get update
Expand Down
9 changes: 8 additions & 1 deletion litellm/cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,14 @@ def _select_model_name_for_cost_calc(

if custom_pricing is True:
if router_model_id is not None and router_model_id in litellm.model_cost:
return_model = router_model_id
entry = litellm.model_cost[router_model_id]
if (
entry.get("input_cost_per_token") is not None
or entry.get("input_cost_per_second") is not None
):
return_model = router_model_id
else:
return_model = model
else:
return_model = model

Expand Down
1 change: 1 addition & 0 deletions litellm/passthrough/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def forward_headers_from_request(
# Header We Should NOT forward
request_headers.pop("content-length", None)
request_headers.pop("host", None)
request_headers.pop("x-litellm-api-key", None)

# Combine request headers with custom headers
headers = {**request_headers, **headers}
Expand Down
3 changes: 2 additions & 1 deletion litellm/proxy/example_config_yaml/custom_auth_basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import Request

from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth


async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
Expand All @@ -9,6 +9,7 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
api_key="best-api-key-ever",
user_id="best-user-id-ever",
team_id="best-team-id-ever",
user_role=LitellmUserRoles.PROXY_ADMIN,
)
except Exception:
raise Exception
25 changes: 19 additions & 6 deletions litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,11 +596,24 @@ async def anthropic_proxy_route(
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)

# Add or update query parameters
anthropic_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="anthropic",
region_name=None,
)
# Credential priority: client-provided credentials take precedence over
# server credentials. This allows mixed mode where some users bring their
# own key (BYOK) or OAuth token (Claude Code Max) while others use the
# server's API key.
x_api_key_header = request.headers.get("x-api-key", "")
auth_header = request.headers.get("authorization", "")

if x_api_key_header or auth_header:
custom_headers = {}
else:
anthropic_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="anthropic",
region_name=None,
)
if anthropic_api_key:
custom_headers = {"x-api-key": anthropic_api_key}
else:
custom_headers = {}

## check for streaming
is_streaming_request = await is_streaming_request_fn(request)
Expand All @@ -609,7 +622,7 @@ async def anthropic_proxy_route(
endpoint_func = create_pass_through_route(
endpoint=endpoint,
target=str(updated_url),
custom_headers={"x-api-key": "{}".format(anthropic_api_key)},
custom_headers=custom_headers,
_forward_headers=True,
is_streaming_request=is_streaming_request,
) # dynamically construct pass-through endpoint based on incoming path
Expand Down
7 changes: 7 additions & 0 deletions litellm/proxy/proxy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,16 @@ def _maybe_setup_prometheus_multiproc_dir(
return

# Check if prometheus is in any callback list
# Each setting can be a list or a single string; normalize to list
callbacks = litellm_settings.get("callbacks") or []
success_callbacks = litellm_settings.get("success_callback") or []
failure_callbacks = litellm_settings.get("failure_callback") or []
if isinstance(callbacks, str):
callbacks = [callbacks]
if isinstance(success_callbacks, str):
success_callbacks = [success_callbacks]
if isinstance(failure_callbacks, str):
failure_callbacks = [failure_callbacks]
all_callbacks = callbacks + success_callbacks + failure_callbacks
if "prometheus" not in all_callbacks:
return
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.82.2"
version = "1.82.3"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
Expand Down Expand Up @@ -183,7 +183,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"

[tool.commitizen]
version = "1.82.2"
version = "1.82.3"
version_files = [
"pyproject.toml:^version"
]
Expand Down
3 changes: 2 additions & 1 deletion tests/local_testing/test_router_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def test_router_get_model_info_wildcard_routes():


@pytest.mark.asyncio
@pytest.mark.flaky(retries=3, delay=1)
async def test_router_get_model_group_usage_wildcard_routes():
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
Expand All @@ -219,7 +220,7 @@ async def test_router_get_model_group_usage_wildcard_routes():
)
print(resp)

await asyncio.sleep(1)
await asyncio.sleep(2)

tpm, rpm = await router.get_model_group_usage(model_group="gemini/gemini-1.5-flash")

Expand Down
3 changes: 3 additions & 0 deletions tests/mcp_tests/test_mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ async def test_mcp_http_transport_tool_not_found():
@pytest.mark.asyncio
async def test_streamable_http_mcp_handler_mock():
"""Test the streamable HTTP MCP handler functionality"""
from litellm.proxy._types import UserAPIKeyAuth

# Mock the session manager and its methods
mock_session_manager = AsyncMock()
Expand Down Expand Up @@ -425,6 +426,8 @@ async def test_streamable_http_mcp_handler_mock():
), patch(
"litellm.proxy._experimental.mcp_server.server.extract_mcp_auth_context",
AsyncMock(return_value=mock_auth_context),
), patch(
"litellm.proxy._experimental.mcp_server.server.set_auth_context",
):
from litellm.proxy._experimental.mcp_server.server import (
handle_streamable_http_mcp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from fastapi import Request, Response
from fastapi.testclient import TestClient

from litellm.passthrough.utils import CommonUtils
from litellm.passthrough.utils import BasePassthroughUtils, CommonUtils

sys.path.insert(
0, os.path.abspath("../../../..")
Expand Down Expand Up @@ -95,4 +95,42 @@ def test_encode_bedrock_runtime_modelid_arn_edge_cases():
endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke"
expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke"
result = CommonUtils.encode_bedrock_runtime_modelid_arn(endpoint)
assert result == expected
assert result == expected


def test_forward_headers_strips_litellm_api_key():
"""x-litellm-api-key should not be forwarded to upstream providers."""
request_headers = {
"x-litellm-api-key": "sk-litellm-secret-key",
"content-type": "application/json",
"x-api-key": "sk-ant-api-key",
}

result = BasePassthroughUtils.forward_headers_from_request(
request_headers=request_headers.copy(),
headers={},
forward_headers=True,
)

assert "x-litellm-api-key" not in result
assert result.get("content-type") == "application/json"
assert result.get("x-api-key") == "sk-ant-api-key"


def test_forward_headers_strips_host_and_content_length():
"""host and content-length should not be forwarded."""
request_headers = {
"host": "api.anthropic.com",
"content-length": "1234",
"content-type": "application/json",
}

result = BasePassthroughUtils.forward_headers_from_request(
request_headers=request_headers.copy(),
headers={},
forward_headers=True,
)

assert "host" not in result
assert "content-length" not in result
assert result.get("content-type") == "application/json"
24 changes: 24 additions & 0 deletions tests/test_litellm/proxy/test_prometheus_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,30 @@ def test_respects_existing_env_var(self, tmp_path):
assert os.environ["PROMETHEUS_MULTIPROC_DIR"] == custom_dir
assert os.path.isdir(custom_dir)

@pytest.mark.parametrize(
"litellm_settings",
[
{"callbacks": "prometheus"},
{"success_callback": "prometheus"},
{"failure_callback": "prometheus"},
{"callbacks": "custom_callback"}, # string but not prometheus
],
)
def test_handles_string_callbacks(self, litellm_settings):
"""When callbacks are specified as a string instead of a list, should not crash."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None)
os.environ.pop("prometheus_multiproc_dir", None)

# Should not raise TypeError
ProxyInitializationHelpers._maybe_setup_prometheus_multiproc_dir(
num_workers=4,
litellm_settings=litellm_settings,
)

# Cleanup
os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None)

@pytest.mark.parametrize(
"num_workers, litellm_settings",
[
Expand Down
4 changes: 2 additions & 2 deletions tests/test_litellm/proxy/test_proxy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def test_startup_fails_when_db_setup_fails(
mock_atexit_register,
mock_subprocess_run,
):
"""Test that proxy exits with code 1 when PrismaManager.setup_database returns False"""
"""Test that proxy exits with code 1 when PrismaManager.setup_database returns False and --enforce_prisma_migration_check is set"""
from litellm.proxy.proxy_cli import run_server

mock_subprocess_run.return_value = MagicMock(returncode=0)
Expand Down Expand Up @@ -717,7 +717,7 @@ def test_startup_fails_when_db_setup_fails(

with pytest.raises(SystemExit) as exc_info:
run_server.main(
["--local", "--skip_server_startup"], standalone_mode=False
["--local", "--skip_server_startup", "--enforce_prisma_migration_check"], standalone_mode=False
)
assert exc_info.value.code == 1
mock_setup_database.assert_called_once_with(use_migrate=True)
Expand Down
59 changes: 59 additions & 0 deletions tests/test_litellm/test_cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,65 @@ def test_custom_pricing_cost_calc_uses_router_model_id_from_litellm_metadata():
assert custom_model_id not in (selected_model_no_custom or "")


def test_per_request_custom_pricing_with_router():
"""When custom pricing is passed as per-request kwargs (not in model_list),
_select_model_name_for_cost_calc should fall back to the model name
(where register_model stored the pricing) instead of the router_model_id
(which has no pricing data).

Regression test for the bug where response._hidden_params["response_cost"]
returned 0.0 for per-request custom pricing via Router.
"""
from litellm import Router
from litellm.cost_calculator import _select_model_name_for_cost_calc

router = Router(
model_list=[
{
"model_name": "openai/gpt-3.5-turbo",
"litellm_params": {
"model": "openai/gpt-3.5-turbo",
"api_key": "test_api_key",
},
},
]
)

# Get the deployment's model_id (hash) that the router registered
deployment = router.model_list[0]
router_model_id = deployment["model_info"]["id"]

# The router registered this hash in model_cost but without custom pricing
assert router_model_id in litellm.model_cost
entry = litellm.model_cost[router_model_id]
# No custom pricing was set in model_list, so these should be None
assert entry.get("input_cost_per_token") is None

# Now simulate what completion() does: register custom pricing under the model name
litellm.register_model(
{
"openai/gpt-3.5-turbo": {
"input_cost_per_token": 2.0,
"output_cost_per_token": 2.0,
"litellm_provider": "openai",
}
}
)

# _select_model_name_for_cost_calc should pick the model name (which has pricing),
# NOT the router_model_id (which has no pricing)
selected = _select_model_name_for_cost_calc(
model="openai/gpt-3.5-turbo",
completion_response=None,
custom_pricing=True,
custom_llm_provider="openai",
router_model_id=router_model_id,
)
assert selected is not None
assert router_model_id not in selected
assert "gpt-3.5-turbo" in selected


def test_azure_realtime_cost_calculator():
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
Expand Down
Loading