Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions litellm/proxy/proxy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,7 @@ async def root_redirect():
RedisCache
] = None # redis cache used for tracking spend, tpm/rpm limits
polling_via_cache_enabled: Union[Literal["all"], List[str], bool] = False
native_background_mode: List[str] = [] # Models that should use native provider background mode instead of polling
polling_cache_ttl: int = 3600 # Default 1 hour TTL for polling cache
user_custom_auth = None
user_custom_key_generate = None
Expand Down Expand Up @@ -2456,14 +2457,17 @@ async def load_config( # noqa: PLR0915
pass
elif key == "responses":
# Initialize global polling via cache settings
global polling_via_cache_enabled, polling_cache_ttl
global polling_via_cache_enabled, native_background_mode, polling_cache_ttl
background_mode = value.get("background_mode", {})
polling_via_cache_enabled = background_mode.get(
"polling_via_cache", False
)
native_background_mode = background_mode.get(
"native_background_mode", []
)
polling_cache_ttl = background_mode.get("ttl", 3600)
verbose_proxy_logger.debug(
f"{blue_color_code} Initialized polling via cache: enabled={polling_via_cache_enabled}, ttl={polling_cache_ttl}{reset_color_code}"
f"{blue_color_code} Initialized polling via cache: enabled={polling_via_cache_enabled}, native_background_mode={native_background_mode}, ttl={polling_cache_ttl}{reset_color_code}"
)
elif key == "default_team_settings":
for idx, team_setting in enumerate(
Expand Down
2 changes: 2 additions & 0 deletions litellm/proxy/response_api_endpoints/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ async def responses_api(
_read_request_body,
general_settings,
llm_router,
native_background_mode,
polling_cache_ttl,
polling_via_cache_enabled,
proxy_config,
Expand Down Expand Up @@ -95,6 +96,7 @@ async def responses_api(
redis_cache=redis_usage_cache,
model=data.get("model", ""),
llm_router=llm_router,
native_background_mode=native_background_mode,
)

# If polling is enabled, use polling mode
Expand Down
12 changes: 11 additions & 1 deletion litellm/proxy/response_polling/polling_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import json
from datetime import datetime, timezone
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional

from litellm._logging import verbose_proxy_logger
from litellm._uuid import uuid4
Expand Down Expand Up @@ -257,6 +257,7 @@ def should_use_polling_for_request(
redis_cache, # RedisCache or None
model: str,
llm_router, # Router instance or None
native_background_mode: Optional[List[str]] = None, # List of models that should use native background mode
) -> bool:
"""
Determine if polling via cache should be used for a request.
Expand All @@ -267,6 +268,8 @@ def should_use_polling_for_request(
redis_cache: Redis cache instance (required for polling)
model: Model name from the request (e.g., "gpt-5" or "openai/gpt-4o")
llm_router: LiteLLM router instance for looking up model deployments
native_background_mode: List of model names that should use native provider
background mode instead of polling via cache

Returns:
True if polling should be used, False otherwise
Expand All @@ -275,6 +278,13 @@ def should_use_polling_for_request(
if not (background_mode and polling_via_cache_enabled and redis_cache):
return False

# Check if model is in native_background_mode list - these use native provider background mode
if native_background_mode and model in native_background_mode:
verbose_proxy_logger.debug(
f"Model {model} is in native_background_mode list, skipping polling via cache"
)
return False

# "all" enables polling for all providers
if polling_via_cache_enabled == "all":
return True
Expand Down
136 changes: 136 additions & 0 deletions tests/proxy_unit_tests/test_response_polling_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,142 @@ def test_polling_with_router_lookup_no_match(self):

assert result is False

# ==================== Native Background Mode Tests ====================

def test_polling_disabled_when_model_in_native_background_mode(self):
"""Test that polling is disabled when model is in native_background_mode list"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled="all",
redis_cache=Mock(),
model="o4-mini-deep-research",
llm_router=None,
native_background_mode=["o4-mini-deep-research", "o3-deep-research"],
)

assert result is False

def test_polling_disabled_for_native_background_mode_with_provider_list(self):
"""Test that native_background_mode takes precedence even when provider matches"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled=["openai"],
redis_cache=Mock(),
model="openai/o4-mini-deep-research",
llm_router=None,
native_background_mode=["openai/o4-mini-deep-research"],
)

assert result is False

def test_polling_enabled_when_model_not_in_native_background_mode(self):
"""Test that polling is enabled when model is not in native_background_mode list"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled="all",
redis_cache=Mock(),
model="gpt-4o",
llm_router=None,
native_background_mode=["o4-mini-deep-research"],
)

assert result is True

def test_polling_enabled_when_native_background_mode_is_none(self):
"""Test that polling works normally when native_background_mode is None"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled="all",
redis_cache=Mock(),
model="gpt-4o",
llm_router=None,
native_background_mode=None,
)

assert result is True

def test_polling_enabled_when_native_background_mode_is_empty_list(self):
"""Test that polling works normally when native_background_mode is empty list"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled="all",
redis_cache=Mock(),
model="gpt-4o",
llm_router=None,
native_background_mode=[],
)

assert result is True

def test_native_background_mode_exact_match_required(self):
"""Test that native_background_mode uses exact model name matching"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

# "o4-mini" should not match "o4-mini-deep-research"
result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled="all",
redis_cache=Mock(),
model="o4-mini",
llm_router=None,
native_background_mode=["o4-mini-deep-research"],
)

assert result is True

def test_native_background_mode_with_provider_prefix_in_request(self):
"""Test native_background_mode matching when request model has provider prefix"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

# Model in native_background_mode without provider prefix
# Request comes in with provider prefix - should not match
result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled=["openai"],
redis_cache=Mock(),
model="openai/o4-mini-deep-research",
llm_router=None,
native_background_mode=["o4-mini-deep-research"], # Without prefix
)

# Should return True because "openai/o4-mini-deep-research" != "o4-mini-deep-research"
assert result is True

def test_native_background_mode_with_router_lookup(self):
"""Test that native_background_mode works with router-resolved models"""
from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request

mock_router = Mock()
mock_router.model_name_to_deployment_indices = {"deep-research": [0]}
mock_router.model_list = [
{
"model_name": "deep-research",
"litellm_params": {"model": "openai/o4-mini-deep-research"}
}
]

# Model alias "deep-research" is in native_background_mode
result = should_use_polling_for_request(
background_mode=True,
polling_via_cache_enabled=["openai"],
redis_cache=Mock(),
model="deep-research",
llm_router=mock_router,
native_background_mode=["deep-research"],
)

assert result is False


class TestStreamingEventParsing:
"""
Expand Down
Loading