diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 183c25ed463..cf5d133bc33 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1224,6 +1224,7 @@ async def root_redirect(): RedisCache ] = None # redis cache used for tracking spend, tpm/rpm limits polling_via_cache_enabled: Union[Literal["all"], List[str], bool] = False +native_background_mode: List[str] = [] # Models that should use native provider background mode instead of polling polling_cache_ttl: int = 3600 # Default 1 hour TTL for polling cache user_custom_auth = None user_custom_key_generate = None @@ -2456,14 +2457,17 @@ async def load_config( # noqa: PLR0915 pass elif key == "responses": # Initialize global polling via cache settings - global polling_via_cache_enabled, polling_cache_ttl + global polling_via_cache_enabled, native_background_mode, polling_cache_ttl background_mode = value.get("background_mode", {}) polling_via_cache_enabled = background_mode.get( "polling_via_cache", False ) + native_background_mode = background_mode.get( + "native_background_mode", [] + ) polling_cache_ttl = background_mode.get("ttl", 3600) verbose_proxy_logger.debug( - f"{blue_color_code} Initialized polling via cache: enabled={polling_via_cache_enabled}, ttl={polling_cache_ttl}{reset_color_code}" + f"{blue_color_code} Initialized polling via cache: enabled={polling_via_cache_enabled}, native_background_mode={native_background_mode}, ttl={polling_cache_ttl}{reset_color_code}" ) elif key == "default_team_settings": for idx, team_setting in enumerate( diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py index ec1bc5497bd..44e8c42b2c1 100644 --- a/litellm/proxy/response_api_endpoints/endpoints.py +++ b/litellm/proxy/response_api_endpoints/endpoints.py @@ -68,6 +68,7 @@ async def responses_api( _read_request_body, general_settings, llm_router, + native_background_mode, polling_cache_ttl, polling_via_cache_enabled, proxy_config, @@ -95,6 +96,7 @@ async def responses_api( redis_cache=redis_usage_cache, model=data.get("model", ""), llm_router=llm_router, + native_background_mode=native_background_mode, ) # If polling is enabled, use polling mode diff --git a/litellm/proxy/response_polling/polling_handler.py b/litellm/proxy/response_polling/polling_handler.py index c47578c8d7b..f0b850049bf 100644 --- a/litellm/proxy/response_polling/polling_handler.py +++ b/litellm/proxy/response_polling/polling_handler.py @@ -3,7 +3,7 @@ """ import json from datetime import datetime, timezone -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from litellm._logging import verbose_proxy_logger from litellm._uuid import uuid4 @@ -257,6 +257,7 @@ def should_use_polling_for_request( redis_cache, # RedisCache or None model: str, llm_router, # Router instance or None + native_background_mode: Optional[List[str]] = None, # List of models that should use native background mode ) -> bool: """ Determine if polling via cache should be used for a request. @@ -267,6 +268,8 @@ def should_use_polling_for_request( redis_cache: Redis cache instance (required for polling) model: Model name from the request (e.g., "gpt-5" or "openai/gpt-4o") llm_router: LiteLLM router instance for looking up model deployments + native_background_mode: List of model names that should use native provider + background mode instead of polling via cache Returns: True if polling should be used, False otherwise @@ -275,6 +278,13 @@ def should_use_polling_for_request( if not (background_mode and polling_via_cache_enabled and redis_cache): return False + # Check if model is in native_background_mode list - these use native provider background mode + if native_background_mode and model in native_background_mode: + verbose_proxy_logger.debug( + f"Model {model} is in native_background_mode list, skipping polling via cache" + ) + return False + # "all" enables polling for all providers if polling_via_cache_enabled == "all": return True diff --git a/tests/proxy_unit_tests/test_response_polling_handler.py b/tests/proxy_unit_tests/test_response_polling_handler.py index cb4cd0efe57..26f8ac24adc 100644 --- a/tests/proxy_unit_tests/test_response_polling_handler.py +++ b/tests/proxy_unit_tests/test_response_polling_handler.py @@ -1005,6 +1005,142 @@ def test_polling_with_router_lookup_no_match(self): assert result is False + # ==================== Native Background Mode Tests ==================== + + def test_polling_disabled_when_model_in_native_background_mode(self): + """Test that polling is disabled when model is in native_background_mode list""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled="all", + redis_cache=Mock(), + model="o4-mini-deep-research", + llm_router=None, + native_background_mode=["o4-mini-deep-research", "o3-deep-research"], + ) + + assert result is False + + def test_polling_disabled_for_native_background_mode_with_provider_list(self): + """Test that native_background_mode takes precedence even when provider matches""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled=["openai"], + redis_cache=Mock(), + model="openai/o4-mini-deep-research", + llm_router=None, + native_background_mode=["openai/o4-mini-deep-research"], + ) + + assert result is False + + def test_polling_enabled_when_model_not_in_native_background_mode(self): + """Test that polling is enabled when model is not in native_background_mode list""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled="all", + redis_cache=Mock(), + model="gpt-4o", + llm_router=None, + native_background_mode=["o4-mini-deep-research"], + ) + + assert result is True + + def test_polling_enabled_when_native_background_mode_is_none(self): + """Test that polling works normally when native_background_mode is None""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled="all", + redis_cache=Mock(), + model="gpt-4o", + llm_router=None, + native_background_mode=None, + ) + + assert result is True + + def test_polling_enabled_when_native_background_mode_is_empty_list(self): + """Test that polling works normally when native_background_mode is empty list""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled="all", + redis_cache=Mock(), + model="gpt-4o", + llm_router=None, + native_background_mode=[], + ) + + assert result is True + + def test_native_background_mode_exact_match_required(self): + """Test that native_background_mode uses exact model name matching""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + # "o4-mini" should not match "o4-mini-deep-research" + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled="all", + redis_cache=Mock(), + model="o4-mini", + llm_router=None, + native_background_mode=["o4-mini-deep-research"], + ) + + assert result is True + + def test_native_background_mode_with_provider_prefix_in_request(self): + """Test native_background_mode matching when request model has provider prefix""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + # Model in native_background_mode without provider prefix + # Request comes in with provider prefix - should not match + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled=["openai"], + redis_cache=Mock(), + model="openai/o4-mini-deep-research", + llm_router=None, + native_background_mode=["o4-mini-deep-research"], # Without prefix + ) + + # Should return True because "openai/o4-mini-deep-research" != "o4-mini-deep-research" + assert result is True + + def test_native_background_mode_with_router_lookup(self): + """Test that native_background_mode works with router-resolved models""" + from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request + + mock_router = Mock() + mock_router.model_name_to_deployment_indices = {"deep-research": [0]} + mock_router.model_list = [ + { + "model_name": "deep-research", + "litellm_params": {"model": "openai/o4-mini-deep-research"} + } + ] + + # Model alias "deep-research" is in native_background_mode + result = should_use_polling_for_request( + background_mode=True, + polling_via_cache_enabled=["openai"], + redis_cache=Mock(), + model="deep-research", + llm_router=mock_router, + native_background_mode=["deep-research"], + ) + + assert result is False + class TestStreamingEventParsing: """