Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3218,6 +3218,7 @@ jobs:
-e DD_API_KEY=$DD_API_KEY \
-e DD_SITE=$DD_SITE \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
-e LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES=true \
--add-host host.docker.internal:host-gateway \
--name my-app \
-v $(pwd)/litellm/proxy/example_config_yaml/pass_through_config.yaml:/app/config.yaml \
Expand Down
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,4 @@ LiteLLM is a unified interface for 100+ LLM providers with two main components:
**Fix options:**
1. **Create a Prisma migration** (permanent) — run `prisma migrate dev --name <description>` in the worktree. The generated file will be picked up by `prisma migrate deploy` on next startup.
2. **Apply manually for local dev** — `psql -d litellm -c "ALTER TABLE ... ADD COLUMN IF NOT EXISTS ..."` after each proxy start. Fine for dev, not for production.
3. **Update litellm-proxy-extras** — if the package is installed from PyPI, its migration directory must include the new file. Either update the package or run the migration manually until the next release ships it.
3. **Update litellm-proxy-extras** — if the package is installed from PyPI, its migration directory must include the new file. Either update the package or run the migration manually until the next release ships it.
2 changes: 1 addition & 1 deletion ci_cd/security_scans.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ echo "Starting security scans for LiteLLM..."
install_trivy() {
echo "Installing Trivy and required tools..."
sudo apt-get update
sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl
sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl bsdmainutils
wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add -
echo "deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main" | sudo tee -a /etc/apt/sources.list.d/trivy.list
sudo apt-get update
Expand Down
9 changes: 8 additions & 1 deletion litellm/cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,14 @@ def _select_model_name_for_cost_calc(

if custom_pricing is True:
if router_model_id is not None and router_model_id in litellm.model_cost:
return_model = router_model_id
entry = litellm.model_cost[router_model_id]
if (
entry.get("input_cost_per_token") is not None
or entry.get("input_cost_per_second") is not None
):
return_model = router_model_id
else:
return_model = model
Comment on lines +664 to +670
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pricing check may miss non-token cost types

The guard for whether the router_model_id entry has real pricing only checks input_cost_per_token and input_cost_per_second. There are other pricing dimensions in model_prices_and_context_window.json — e.g. input_cost_per_audio_token, input_cost_per_image_token, input_cost_per_character — that would cause the condition to evaluate to False even when the entry does have meaningful custom pricing, silently falling back to the model name and potentially returning the wrong cost.

Consider broadening the guard to cover all known input cost fields:

PRICING_FIELDS = (
    "input_cost_per_token",
    "input_cost_per_second",
    "input_cost_per_audio_token",
    "input_cost_per_image_token",
    "input_cost_per_character",
)
if any(entry.get(f) is not None for f in PRICING_FIELDS):
    return_model = router_model_id
else:
    return_model = model

else:
return_model = model

Expand Down
3 changes: 2 additions & 1 deletion litellm/proxy/example_config_yaml/custom_auth_basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import Request

from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth


async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
Expand All @@ -9,6 +9,7 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
api_key="best-api-key-ever",
user_id="best-user-id-ever",
team_id="best-team-id-ever",
user_role=LitellmUserRoles.PROXY_ADMIN,
)
except Exception:
raise Exception
7 changes: 7 additions & 0 deletions litellm/proxy/proxy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,16 @@ def _maybe_setup_prometheus_multiproc_dir(
return

# Check if prometheus is in any callback list
# Each setting can be a list or a single string; normalize to list
callbacks = litellm_settings.get("callbacks") or []
success_callbacks = litellm_settings.get("success_callback") or []
failure_callbacks = litellm_settings.get("failure_callback") or []
if isinstance(callbacks, str):
callbacks = [callbacks]
if isinstance(success_callbacks, str):
success_callbacks = [success_callbacks]
if isinstance(failure_callbacks, str):
failure_callbacks = [failure_callbacks]
all_callbacks = callbacks + success_callbacks + failure_callbacks
if "prometheus" not in all_callbacks:
return
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.82.2"
version = "1.82.3"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
Expand Down Expand Up @@ -183,7 +183,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"

[tool.commitizen]
version = "1.82.2"
version = "1.82.3"
version_files = [
"pyproject.toml:^version"
]
Expand Down
3 changes: 2 additions & 1 deletion tests/local_testing/test_router_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def test_router_get_model_info_wildcard_routes():


@pytest.mark.asyncio
@pytest.mark.flaky(retries=3, delay=1)
Comment on lines 200 to +202
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pytest.mark.flaky masks a timing-dependent test

Adding retries=3, delay=1 addresses the symptom but not the root cause. The test relies on a Redis/in-memory counter being updated after router.acompletion() completes — a time-based race condition. The sleep increase from 1 s → 2 s is a better signal here, but retrying a flaky test in a shared CI environment is still fragile and can hide real regressions.

Consider instead waiting on the side-effect deterministically, e.g. polling get_model_group_usage until it reflects the expected value, or mocking the underlying cache update.

async def test_router_get_model_group_usage_wildcard_routes():
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
Expand All @@ -219,7 +220,7 @@ async def test_router_get_model_group_usage_wildcard_routes():
)
print(resp)

await asyncio.sleep(1)
await asyncio.sleep(2)

tpm, rpm = await router.get_model_group_usage(model_group="gemini/gemini-1.5-flash")

Expand Down
3 changes: 3 additions & 0 deletions tests/mcp_tests/test_mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ async def test_mcp_http_transport_tool_not_found():
@pytest.mark.asyncio
async def test_streamable_http_mcp_handler_mock():
"""Test the streamable HTTP MCP handler functionality"""
from litellm.proxy._types import UserAPIKeyAuth

# Mock the session manager and its methods
mock_session_manager = AsyncMock()
Expand Down Expand Up @@ -425,6 +426,8 @@ async def test_streamable_http_mcp_handler_mock():
), patch(
"litellm.proxy._experimental.mcp_server.server.extract_mcp_auth_context",
AsyncMock(return_value=mock_auth_context),
), patch(
"litellm.proxy._experimental.mcp_server.server.set_auth_context",
):
from litellm.proxy._experimental.mcp_server.server import (
handle_streamable_http_mcp,
Expand Down
24 changes: 24 additions & 0 deletions tests/test_litellm/proxy/test_prometheus_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,30 @@ def test_respects_existing_env_var(self, tmp_path):
assert os.environ["PROMETHEUS_MULTIPROC_DIR"] == custom_dir
assert os.path.isdir(custom_dir)

@pytest.mark.parametrize(
"litellm_settings",
[
{"callbacks": "prometheus"},
{"success_callback": "prometheus"},
{"failure_callback": "prometheus"},
{"callbacks": "custom_callback"}, # string but not prometheus
],
)
def test_handles_string_callbacks(self, litellm_settings):
"""When callbacks are specified as a string instead of a list, should not crash."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None)
os.environ.pop("prometheus_multiproc_dir", None)

# Should not raise TypeError
ProxyInitializationHelpers._maybe_setup_prometheus_multiproc_dir(
num_workers=4,
litellm_settings=litellm_settings,
)

# Cleanup
os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None)

@pytest.mark.parametrize(
"num_workers, litellm_settings",
[
Expand Down
4 changes: 2 additions & 2 deletions tests/test_litellm/proxy/test_proxy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def test_startup_fails_when_db_setup_fails(
mock_atexit_register,
mock_subprocess_run,
):
"""Test that proxy exits with code 1 when PrismaManager.setup_database returns False"""
"""Test that proxy exits with code 1 when PrismaManager.setup_database returns False and --enforce_prisma_migration_check is set"""
from litellm.proxy.proxy_cli import run_server

mock_subprocess_run.return_value = MagicMock(returncode=0)
Expand Down Expand Up @@ -717,7 +717,7 @@ def test_startup_fails_when_db_setup_fails(

with pytest.raises(SystemExit) as exc_info:
run_server.main(
["--local", "--skip_server_startup"], standalone_mode=False
["--local", "--skip_server_startup", "--enforce_prisma_migration_check"], standalone_mode=False
)
assert exc_info.value.code == 1
mock_setup_database.assert_called_once_with(use_migrate=True)
Expand Down
59 changes: 59 additions & 0 deletions tests/test_litellm/test_cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,65 @@ def test_custom_pricing_cost_calc_uses_router_model_id_from_litellm_metadata():
assert custom_model_id not in (selected_model_no_custom or "")


def test_per_request_custom_pricing_with_router():
"""When custom pricing is passed as per-request kwargs (not in model_list),
_select_model_name_for_cost_calc should fall back to the model name
(where register_model stored the pricing) instead of the router_model_id
(which has no pricing data).

Regression test for the bug where response._hidden_params["response_cost"]
returned 0.0 for per-request custom pricing via Router.
"""
from litellm import Router
from litellm.cost_calculator import _select_model_name_for_cost_calc

router = Router(
model_list=[
{
"model_name": "openai/gpt-3.5-turbo",
"litellm_params": {
"model": "openai/gpt-3.5-turbo",
"api_key": "test_api_key",
},
},
]
)

# Get the deployment's model_id (hash) that the router registered
deployment = router.model_list[0]
router_model_id = deployment["model_info"]["id"]

# The router registered this hash in model_cost but without custom pricing
assert router_model_id in litellm.model_cost
entry = litellm.model_cost[router_model_id]
# No custom pricing was set in model_list, so these should be None
assert entry.get("input_cost_per_token") is None

# Now simulate what completion() does: register custom pricing under the model name
litellm.register_model(
{
"openai/gpt-3.5-turbo": {
"input_cost_per_token": 2.0,
"output_cost_per_token": 2.0,
"litellm_provider": "openai",
}
}
)

# _select_model_name_for_cost_calc should pick the model name (which has pricing),
# NOT the router_model_id (which has no pricing)
selected = _select_model_name_for_cost_calc(
model="openai/gpt-3.5-turbo",
completion_response=None,
custom_pricing=True,
custom_llm_provider="openai",
router_model_id=router_model_id,
)
assert selected is not None
assert router_model_id not in selected
assert "gpt-3.5-turbo" in selected


def test_azure_realtime_cost_calculator():
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ export function useLogFilterLogic({
const filteredLogs: PaginatedResponse = useMemo(() => {
if (hasBackendFilters) {
// Prefer backend result if present; otherwise fall back to latest logs
if (backendFilteredLogs && backendFilteredLogs.data && backendFilteredLogs.data.length > 0) {
if (backendFilteredLogs && backendFilteredLogs.data) {
return backendFilteredLogs;
}
return (
Expand Down
Loading