Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 36 additions & 14 deletions litellm/proxy/common_request_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def _override_openai_response_model(
response_obj: Any,
requested_model: str,
log_context: str,
upstream_model: Optional[str] = None,
) -> None:
"""
Force the OpenAI-compatible `model` field in the response to match what the client requested.
Expand Down Expand Up @@ -289,12 +290,21 @@ def _override_openai_response_model(
if isinstance(response_obj, dict):
downstream_model = response_obj.get("model")
if downstream_model != requested_model:
verbose_proxy_logger.debug(
"%s: response model mismatch - requested=%r downstream=%r. Overriding response['model'] to requested model.",
log_context,
requested_model,
downstream_model,
)
if upstream_model and downstream_model == upstream_model:
verbose_proxy_logger.debug(
"%s: response model is known alias - requested=%r upstream=%r downstream=%r. Overriding response['model'].",
log_context,
requested_model,
upstream_model,
downstream_model,
)
else:
verbose_proxy_logger.warning(
"%s: response model mismatch - requested=%r downstream=%r. Overriding response['model'] to requested model.",
log_context,
requested_model,
downstream_model,
)
response_obj["model"] = requested_model
return

Expand All @@ -308,12 +318,21 @@ def _override_openai_response_model(

downstream_model = getattr(response_obj, "model", None)
if downstream_model != requested_model:
verbose_proxy_logger.debug(
"%s: response model mismatch - requested=%r downstream=%r. Overriding response.model to requested model.",
log_context,
requested_model,
downstream_model,
)
if upstream_model and downstream_model == upstream_model:
verbose_proxy_logger.debug(
"%s: response model is known alias - requested=%r upstream=%r downstream=%r. Overriding response.model.",
log_context,
requested_model,
upstream_model,
downstream_model,
)
else:
verbose_proxy_logger.warning(
"%s: response model mismatch - requested=%r downstream=%r. Overriding response.model to requested model.",
log_context,
requested_model,
downstream_model,
)
Comment on lines +329 to +335
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re-introduces WARNING that was intentionally removed

PR #20994 (commit a2e9e73b6) deliberately changed all model-mismatch logs from WARNING to DEBUG because high-traffic customers using model aliases were generating millions of warnings per day, flooding logs and causing disk space issues.

This PR re-introduces WARNING for the non-alias mismatch case (when upstream_model is None or doesn't match downstream_model). However, many legitimate scenarios produce this mismatch without an upstream_model being available β€” e.g., the streaming path in proxy_server.py:5059 doesn't pass upstream_model at all.

Consider whether the WARNING reintroduction here is intentional, given the history of #20994. If it is, the streaming path in proxy_server.py should also be updated for consistency.


try:
setattr(response_obj, "model", requested_model)
Expand Down Expand Up @@ -978,10 +997,12 @@ async def base_process_llm_request(
# Always return the client-requested model name (not provider-prefixed internal identifiers)
# for OpenAI-compatible responses.
if requested_model_from_client:
_upstream_model = getattr(logging_obj, "model", None)
_override_openai_response_model(
response_obj=response,
requested_model=requested_model_from_client,
log_context=f"litellm_call_id={logging_obj.litellm_call_id}",
upstream_model=_upstream_model,
)

hidden_params = (
Expand Down Expand Up @@ -1198,11 +1219,12 @@ async def _handle_llm_api_exception(
elif isinstance(e, httpx.HTTPStatusError):
# Handle httpx.HTTPStatusError - extract actual error from response
# This matches the original behavior before the refactor in commit 511d435f6f
error_body = await e.response.aread()
http_status_error: httpx.HTTPStatusError = e
error_body = await http_status_error.response.aread()
error_text = error_body.decode("utf-8")

raise HTTPException(
status_code=e.response.status_code,
status_code=http_status_error.response.status_code,
detail={"error": error_text},
)
error_msg = f"{str(e)}"
Expand Down
202 changes: 198 additions & 4 deletions tests/test_litellm/proxy/test_common_request_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1229,7 +1229,7 @@ def test_override_model_no_requested_model(self):
without modifying the response.
"""
fallback_model = "gpt-3.5-turbo"

# Create a mock object response
response_obj = MagicMock()
response_obj.model = fallback_model
Expand All @@ -1238,14 +1238,14 @@ def test_override_model_no_requested_model(self):
"x-litellm-attempted-fallbacks": 1
}
}

# Call the function with None requested_model
_override_openai_response_model(
response_obj=response_obj,
requested_model=None,
log_context="test_context",
)

# Verify the model was not changed
assert response_obj.model == fallback_model

Expand All @@ -1255,8 +1255,202 @@ def test_override_model_no_requested_model(self):
requested_model="",
log_context="test_context",
)

# Verify the model was not changed
assert response_obj.model == fallback_model

def test_override_model_known_alias_logs_debug_not_warning(self):
"""
When downstream_model matches upstream_model (a known alias/internal name),
the function should log at DEBUG level β€” not WARNING β€” and still override
response.model to the client-requested model.
"""
from unittest.mock import patch

requested_model = "my-alias"
upstream_model = "hosted_vllm/meta-llama/Llama-3-8b"

response_obj = MagicMock()
response_obj.model = upstream_model # downstream == upstream (known alias)
response_obj._hidden_params = {}

with patch(
"litellm.proxy.common_request_processing.verbose_proxy_logger"
) as mock_logger:
_override_openai_response_model(
response_obj=response_obj,
requested_model=requested_model,
log_context="test_context",
upstream_model=upstream_model,
)

# Model must still be overridden to the client-requested value
assert response_obj.model == requested_model

# debug() should have been called (alias path)
mock_logger.debug.assert_called()
# warning() must NOT have been called β€” this is a known alias, not a real mismatch
mock_logger.warning.assert_not_called()

def test_override_model_unknown_mismatch_logs_warning(self):
"""
When downstream_model differs from both requested_model and upstream_model,
the function should log at WARNING level to surface unexpected mismatches.
"""
from unittest.mock import patch

requested_model = "my-alias"
upstream_model = "hosted_vllm/meta-llama/Llama-3-8b"
downstream_model = "some-other-model" # Unexpected β€” matches neither

response_obj = MagicMock()
response_obj.model = downstream_model
response_obj._hidden_params = {}

with patch(
"litellm.proxy.common_request_processing.verbose_proxy_logger"
) as mock_logger:
_override_openai_response_model(
response_obj=response_obj,
requested_model=requested_model,
log_context="test_context",
upstream_model=upstream_model,
)

assert response_obj.model == requested_model

mock_logger.warning.assert_called()
mock_logger.debug.assert_not_called()

def test_override_model_no_upstream_model_logs_warning(self):
"""
When upstream_model is not provided (None) and downstream_model differs
from requested_model, the function should log at WARNING level.
"""
from unittest.mock import patch

requested_model = "gpt-4"
downstream_model = "gpt-3.5-turbo"

response_obj = MagicMock()
response_obj.model = downstream_model
response_obj._hidden_params = {}

with patch(
"litellm.proxy.common_request_processing.verbose_proxy_logger"
) as mock_logger:
_override_openai_response_model(
response_obj=response_obj,
requested_model=requested_model,
log_context="test_context",
# upstream_model omitted (defaults to None)
)

assert response_obj.model == requested_model

mock_logger.warning.assert_called()
mock_logger.debug.assert_not_called()

def test_override_model_no_mismatch_no_logging(self):
"""
When downstream_model already equals requested_model, no mismatch logging
should occur at all (neither debug nor warning).
"""
from unittest.mock import patch

requested_model = "gpt-4"

response_obj = MagicMock()
response_obj.model = requested_model # Already correct
response_obj._hidden_params = {}

with patch(
"litellm.proxy.common_request_processing.verbose_proxy_logger"
) as mock_logger:
_override_openai_response_model(
response_obj=response_obj,
requested_model=requested_model,
log_context="test_context",
upstream_model="hosted_vllm/gpt-4",
)

assert response_obj.model == requested_model
mock_logger.warning.assert_not_called()
mock_logger.debug.assert_not_called()

def test_override_model_dict_known_alias_logs_debug_not_warning(self):
"""
Dict branch: when downstream_model matches upstream_model (a known alias),
the function should log at DEBUG β€” not WARNING β€” and override response["model"].
"""
from unittest.mock import patch

requested_model = "my-alias"
upstream_model = "hosted_vllm/meta-llama/Llama-3-8b"
response_obj = {"model": upstream_model, "choices": []}

with patch(
"litellm.proxy.common_request_processing.verbose_proxy_logger"
) as mock_logger:
_override_openai_response_model(
response_obj=response_obj,
requested_model=requested_model,
log_context="test_context",
upstream_model=upstream_model,
)

assert response_obj["model"] == requested_model
mock_logger.debug.assert_called()
mock_logger.warning.assert_not_called()

def test_override_model_dict_unknown_mismatch_logs_warning(self):
"""
Dict branch: when downstream_model differs from both requested_model and
upstream_model, the function should log at WARNING level.
"""
from unittest.mock import patch

requested_model = "my-alias"
upstream_model = "hosted_vllm/meta-llama/Llama-3-8b"
downstream_model = "some-other-model"
response_obj = {"model": downstream_model, "choices": []}

with patch(
"litellm.proxy.common_request_processing.verbose_proxy_logger"
) as mock_logger:
_override_openai_response_model(
response_obj=response_obj,
requested_model=requested_model,
log_context="test_context",
upstream_model=upstream_model,
)

assert response_obj["model"] == requested_model
mock_logger.warning.assert_called()
mock_logger.debug.assert_not_called()

def test_override_model_dict_no_upstream_model_logs_warning(self):
"""
Dict branch: when upstream_model is not provided (None) and downstream_model
differs from requested_model, the function should log at WARNING level.
"""
from unittest.mock import patch

requested_model = "gpt-4"
downstream_model = "gpt-3.5-turbo"
response_obj = {"model": downstream_model, "choices": []}

with patch(
"litellm.proxy.common_request_processing.verbose_proxy_logger"
) as mock_logger:
_override_openai_response_model(
response_obj=response_obj,
requested_model=requested_model,
log_context="test_context",
)

assert response_obj["model"] == requested_model
mock_logger.warning.assert_called()
mock_logger.debug.assert_not_called()


Loading