From 4b3cb5e419a7304814b7059585a593ba104f365c Mon Sep 17 00:00:00 2001 From: Elliot Date: Sat, 21 Feb 2026 00:16:59 -0800 Subject: [PATCH 1/2] fix(proxy): log alias model mismatch at debug, not warning Also fix pre-existing pyright type narrowing error in _handle_llm_api_exception for httpx.HTTPStatusError. --- litellm/proxy/common_request_processing.py | 29 ++-- .../proxy/test_common_request_processing.py | 127 +++++++++++++++++- 2 files changed, 144 insertions(+), 12 deletions(-) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 12f7fe2c3cf..1b383a34935 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -249,6 +249,7 @@ def _override_openai_response_model( response_obj: Any, requested_model: str, log_context: str, + upstream_model: Optional[str] = None, ) -> None: """ Force the OpenAI-compatible `model` field in the response to match what the client requested. @@ -308,12 +309,21 @@ def _override_openai_response_model( downstream_model = getattr(response_obj, "model", None) if downstream_model != requested_model: - verbose_proxy_logger.debug( - "%s: response model mismatch - requested=%r downstream=%r. Overriding response.model to requested model.", - log_context, - requested_model, - downstream_model, - ) + if upstream_model and downstream_model == upstream_model: + verbose_proxy_logger.debug( + "%s: response model is known alias - requested=%r upstream=%r downstream=%r. Overriding response.model.", + log_context, + requested_model, + upstream_model, + downstream_model, + ) + else: + verbose_proxy_logger.warning( + "%s: response model mismatch - requested=%r downstream=%r. Overriding response.model to requested model.", + log_context, + requested_model, + downstream_model, + ) try: setattr(response_obj, "model", requested_model) @@ -978,10 +988,12 @@ async def base_process_llm_request( # Always return the client-requested model name (not provider-prefixed internal identifiers) # for OpenAI-compatible responses. if requested_model_from_client: + _upstream_model = getattr(logging_obj, "model", None) _override_openai_response_model( response_obj=response, requested_model=requested_model_from_client, log_context=f"litellm_call_id={logging_obj.litellm_call_id}", + upstream_model=_upstream_model, ) hidden_params = ( @@ -1198,11 +1210,12 @@ async def _handle_llm_api_exception( elif isinstance(e, httpx.HTTPStatusError): # Handle httpx.HTTPStatusError - extract actual error from response # This matches the original behavior before the refactor in commit 511d435f6f - error_body = await e.response.aread() + http_status_error: httpx.HTTPStatusError = e + error_body = await http_status_error.response.aread() error_text = error_body.decode("utf-8") raise HTTPException( - status_code=e.response.status_code, + status_code=http_status_error.response.status_code, detail={"error": error_text}, ) error_msg = f"{str(e)}" diff --git a/tests/test_litellm/proxy/test_common_request_processing.py b/tests/test_litellm/proxy/test_common_request_processing.py index 7bebe00d61e..ebcab79f19c 100644 --- a/tests/test_litellm/proxy/test_common_request_processing.py +++ b/tests/test_litellm/proxy/test_common_request_processing.py @@ -1229,7 +1229,7 @@ def test_override_model_no_requested_model(self): without modifying the response. """ fallback_model = "gpt-3.5-turbo" - + # Create a mock object response response_obj = MagicMock() response_obj.model = fallback_model @@ -1238,14 +1238,14 @@ def test_override_model_no_requested_model(self): "x-litellm-attempted-fallbacks": 1 } } - + # Call the function with None requested_model _override_openai_response_model( response_obj=response_obj, requested_model=None, log_context="test_context", ) - + # Verify the model was not changed assert response_obj.model == fallback_model @@ -1255,8 +1255,127 @@ def test_override_model_no_requested_model(self): requested_model="", log_context="test_context", ) - + # Verify the model was not changed assert response_obj.model == fallback_model + def test_override_model_known_alias_logs_debug_not_warning(self): + """ + When downstream_model matches upstream_model (a known alias/internal name), + the function should log at DEBUG level — not WARNING — and still override + response.model to the client-requested model. + """ + from unittest.mock import patch + + requested_model = "my-alias" + upstream_model = "hosted_vllm/meta-llama/Llama-3-8b" + + response_obj = MagicMock() + response_obj.model = upstream_model # downstream == upstream (known alias) + response_obj._hidden_params = {} + + with patch( + "litellm.proxy.common_request_processing.verbose_proxy_logger" + ) as mock_logger: + _override_openai_response_model( + response_obj=response_obj, + requested_model=requested_model, + log_context="test_context", + upstream_model=upstream_model, + ) + + # Model must still be overridden to the client-requested value + assert response_obj.model == requested_model + + # debug() should have been called (alias path) + mock_logger.debug.assert_called() + # warning() must NOT have been called — this is a known alias, not a real mismatch + mock_logger.warning.assert_not_called() + + def test_override_model_unknown_mismatch_logs_warning(self): + """ + When downstream_model differs from both requested_model and upstream_model, + the function should log at WARNING level to surface unexpected mismatches. + """ + from unittest.mock import patch + + requested_model = "my-alias" + upstream_model = "hosted_vllm/meta-llama/Llama-3-8b" + downstream_model = "some-other-model" # Unexpected — matches neither + + response_obj = MagicMock() + response_obj.model = downstream_model + response_obj._hidden_params = {} + + with patch( + "litellm.proxy.common_request_processing.verbose_proxy_logger" + ) as mock_logger: + _override_openai_response_model( + response_obj=response_obj, + requested_model=requested_model, + log_context="test_context", + upstream_model=upstream_model, + ) + + assert response_obj.model == requested_model + + mock_logger.warning.assert_called() + mock_logger.debug.assert_not_called() + + def test_override_model_no_upstream_model_logs_warning(self): + """ + When upstream_model is not provided (None) and downstream_model differs + from requested_model, the function should log at WARNING level. + """ + from unittest.mock import patch + + requested_model = "gpt-4" + downstream_model = "gpt-3.5-turbo" + + response_obj = MagicMock() + response_obj.model = downstream_model + response_obj._hidden_params = {} + + with patch( + "litellm.proxy.common_request_processing.verbose_proxy_logger" + ) as mock_logger: + _override_openai_response_model( + response_obj=response_obj, + requested_model=requested_model, + log_context="test_context", + # upstream_model omitted (defaults to None) + ) + + assert response_obj.model == requested_model + + mock_logger.warning.assert_called() + mock_logger.debug.assert_not_called() + + def test_override_model_no_mismatch_no_logging(self): + """ + When downstream_model already equals requested_model, no mismatch logging + should occur at all (neither debug nor warning). + """ + from unittest.mock import patch + + requested_model = "gpt-4" + + response_obj = MagicMock() + response_obj.model = requested_model # Already correct + response_obj._hidden_params = {} + + with patch( + "litellm.proxy.common_request_processing.verbose_proxy_logger" + ) as mock_logger: + _override_openai_response_model( + response_obj=response_obj, + requested_model=requested_model, + log_context="test_context", + upstream_model="hosted_vllm/gpt-4", + ) + + assert response_obj.model == requested_model + mock_logger.warning.assert_not_called() + mock_logger.debug.assert_not_called() + From 902ddf762fd04adce47b79d8c0d51797e77161f2 Mon Sep 17 00:00:00 2001 From: Elliot Date: Sat, 21 Feb 2026 00:44:19 -0800 Subject: [PATCH 2/2] fix(proxy): apply upstream_model alias check to dict response branch --- litellm/proxy/common_request_processing.py | 21 ++++-- .../proxy/test_common_request_processing.py | 75 +++++++++++++++++++ 2 files changed, 90 insertions(+), 6 deletions(-) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 1b383a34935..099880eb51f 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -290,12 +290,21 @@ def _override_openai_response_model( if isinstance(response_obj, dict): downstream_model = response_obj.get("model") if downstream_model != requested_model: - verbose_proxy_logger.debug( - "%s: response model mismatch - requested=%r downstream=%r. Overriding response['model'] to requested model.", - log_context, - requested_model, - downstream_model, - ) + if upstream_model and downstream_model == upstream_model: + verbose_proxy_logger.debug( + "%s: response model is known alias - requested=%r upstream=%r downstream=%r. Overriding response['model'].", + log_context, + requested_model, + upstream_model, + downstream_model, + ) + else: + verbose_proxy_logger.warning( + "%s: response model mismatch - requested=%r downstream=%r. Overriding response['model'] to requested model.", + log_context, + requested_model, + downstream_model, + ) response_obj["model"] = requested_model return diff --git a/tests/test_litellm/proxy/test_common_request_processing.py b/tests/test_litellm/proxy/test_common_request_processing.py index ebcab79f19c..a265f9b220a 100644 --- a/tests/test_litellm/proxy/test_common_request_processing.py +++ b/tests/test_litellm/proxy/test_common_request_processing.py @@ -1378,4 +1378,79 @@ def test_override_model_no_mismatch_no_logging(self): mock_logger.warning.assert_not_called() mock_logger.debug.assert_not_called() + def test_override_model_dict_known_alias_logs_debug_not_warning(self): + """ + Dict branch: when downstream_model matches upstream_model (a known alias), + the function should log at DEBUG — not WARNING — and override response["model"]. + """ + from unittest.mock import patch + + requested_model = "my-alias" + upstream_model = "hosted_vllm/meta-llama/Llama-3-8b" + response_obj = {"model": upstream_model, "choices": []} + + with patch( + "litellm.proxy.common_request_processing.verbose_proxy_logger" + ) as mock_logger: + _override_openai_response_model( + response_obj=response_obj, + requested_model=requested_model, + log_context="test_context", + upstream_model=upstream_model, + ) + + assert response_obj["model"] == requested_model + mock_logger.debug.assert_called() + mock_logger.warning.assert_not_called() + + def test_override_model_dict_unknown_mismatch_logs_warning(self): + """ + Dict branch: when downstream_model differs from both requested_model and + upstream_model, the function should log at WARNING level. + """ + from unittest.mock import patch + + requested_model = "my-alias" + upstream_model = "hosted_vllm/meta-llama/Llama-3-8b" + downstream_model = "some-other-model" + response_obj = {"model": downstream_model, "choices": []} + + with patch( + "litellm.proxy.common_request_processing.verbose_proxy_logger" + ) as mock_logger: + _override_openai_response_model( + response_obj=response_obj, + requested_model=requested_model, + log_context="test_context", + upstream_model=upstream_model, + ) + + assert response_obj["model"] == requested_model + mock_logger.warning.assert_called() + mock_logger.debug.assert_not_called() + + def test_override_model_dict_no_upstream_model_logs_warning(self): + """ + Dict branch: when upstream_model is not provided (None) and downstream_model + differs from requested_model, the function should log at WARNING level. + """ + from unittest.mock import patch + + requested_model = "gpt-4" + downstream_model = "gpt-3.5-turbo" + response_obj = {"model": downstream_model, "choices": []} + + with patch( + "litellm.proxy.common_request_processing.verbose_proxy_logger" + ) as mock_logger: + _override_openai_response_model( + response_obj=response_obj, + requested_model=requested_model, + log_context="test_context", + ) + + assert response_obj["model"] == requested_model + mock_logger.warning.assert_called() + mock_logger.debug.assert_not_called() +