BerriAI · Sameerlite · Jan 28, 2026 · Jan 27, 2026 · Jan 28, 2026
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -1224,6 +1224,7 @@ async def root_redirect():
     RedisCache
 ] = None  # redis cache used for tracking spend, tpm/rpm limits
 polling_via_cache_enabled: Union[Literal["all"], List[str], bool] = False
+native_background_mode: List[str] = []  # Models that should use native provider background mode instead of polling
 polling_cache_ttl: int = 3600  # Default 1 hour TTL for polling cache
 user_custom_auth = None
 user_custom_key_generate = None
@@ -2456,14 +2457,17 @@ async def load_config(  # noqa: PLR0915
                     pass
                 elif key == "responses":
                     # Initialize global polling via cache settings
-                    global polling_via_cache_enabled, polling_cache_ttl
+                    global polling_via_cache_enabled, native_background_mode, polling_cache_ttl
                     background_mode = value.get("background_mode", {})
                     polling_via_cache_enabled = background_mode.get(
                         "polling_via_cache", False
                     )
+                    native_background_mode = background_mode.get(
+                        "native_background_mode", []
+                    )
                     polling_cache_ttl = background_mode.get("ttl", 3600)
                     verbose_proxy_logger.debug(
-                        f"{blue_color_code} Initialized polling via cache: enabled={polling_via_cache_enabled}, ttl={polling_cache_ttl}{reset_color_code}"
+                        f"{blue_color_code} Initialized polling via cache: enabled={polling_via_cache_enabled}, native_background_mode={native_background_mode}, ttl={polling_cache_ttl}{reset_color_code}"
                     )
                 elif key == "default_team_settings":
                     for idx, team_setting in enumerate(

diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py
@@ -68,6 +68,7 @@ async def responses_api(
         _read_request_body,
         general_settings,
         llm_router,
+        native_background_mode,
         polling_cache_ttl,
         polling_via_cache_enabled,
         proxy_config,
@@ -95,6 +96,7 @@ async def responses_api(
         redis_cache=redis_usage_cache,
         model=data.get("model", ""),
         llm_router=llm_router,
+        native_background_mode=native_background_mode,
     )
 
     # If polling is enabled, use polling mode

diff --git a/litellm/proxy/response_polling/polling_handler.py b/litellm/proxy/response_polling/polling_handler.py
@@ -3,7 +3,7 @@
 """
 import json
 from datetime import datetime, timezone
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
 from litellm._logging import verbose_proxy_logger
 from litellm._uuid import uuid4
@@ -257,6 +257,7 @@ def should_use_polling_for_request(
     redis_cache,  # RedisCache or None
     model: str,
     llm_router,  # Router instance or None
+    native_background_mode: Optional[List[str]] = None,  # List of models that should use native background mode
 ) -> bool:
     """
     Determine if polling via cache should be used for a request.
@@ -267,6 +268,8 @@ def should_use_polling_for_request(
         redis_cache: Redis cache instance (required for polling)
         model: Model name from the request (e.g., "gpt-5" or "openai/gpt-4o")
         llm_router: LiteLLM router instance for looking up model deployments
+        native_background_mode: List of model names that should use native provider 
+            background mode instead of polling via cache
 
     Returns:
         True if polling should be used, False otherwise
@@ -275,6 +278,13 @@ def should_use_polling_for_request(
     if not (background_mode and polling_via_cache_enabled and redis_cache):
         return False
 
+    # Check if model is in native_background_mode list - these use native provider background mode
+    if native_background_mode and model in native_background_mode:
+        verbose_proxy_logger.debug(
+            f"Model {model} is in native_background_mode list, skipping polling via cache"
+        )
+        return False
+
     # "all" enables polling for all providers
     if polling_via_cache_enabled == "all":
         return True

diff --git a/tests/proxy_unit_tests/test_response_polling_handler.py b/tests/proxy_unit_tests/test_response_polling_handler.py
@@ -1005,6 +1005,142 @@ def test_polling_with_router_lookup_no_match(self):
 
         assert result is False
 
+    # ==================== Native Background Mode Tests ====================
+
+    def test_polling_disabled_when_model_in_native_background_mode(self):
+        """Test that polling is disabled when model is in native_background_mode list"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled="all",
+            redis_cache=Mock(),
+            model="o4-mini-deep-research",
+            llm_router=None,
+            native_background_mode=["o4-mini-deep-research", "o3-deep-research"],
+        )
+
+        assert result is False
+
+    def test_polling_disabled_for_native_background_mode_with_provider_list(self):
+        """Test that native_background_mode takes precedence even when provider matches"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled=["openai"],
+            redis_cache=Mock(),
+            model="openai/o4-mini-deep-research",
+            llm_router=None,
+            native_background_mode=["openai/o4-mini-deep-research"],
+        )
+
+        assert result is False
+
+    def test_polling_enabled_when_model_not_in_native_background_mode(self):
+        """Test that polling is enabled when model is not in native_background_mode list"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled="all",
+            redis_cache=Mock(),
+            model="gpt-4o",
+            llm_router=None,
+            native_background_mode=["o4-mini-deep-research"],
+        )
+
+        assert result is True
+
+    def test_polling_enabled_when_native_background_mode_is_none(self):
+        """Test that polling works normally when native_background_mode is None"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled="all",
+            redis_cache=Mock(),
+            model="gpt-4o",
+            llm_router=None,
+            native_background_mode=None,
+        )
+
+        assert result is True
+
+    def test_polling_enabled_when_native_background_mode_is_empty_list(self):
+        """Test that polling works normally when native_background_mode is empty list"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled="all",
+            redis_cache=Mock(),
+            model="gpt-4o",
+            llm_router=None,
+            native_background_mode=[],
+        )
+
+        assert result is True
+
+    def test_native_background_mode_exact_match_required(self):
+        """Test that native_background_mode uses exact model name matching"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        # "o4-mini" should not match "o4-mini-deep-research"
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled="all",
+            redis_cache=Mock(),
+            model="o4-mini",
+            llm_router=None,
+            native_background_mode=["o4-mini-deep-research"],
+        )
+
+        assert result is True
+
+    def test_native_background_mode_with_provider_prefix_in_request(self):
+        """Test native_background_mode matching when request model has provider prefix"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        # Model in native_background_mode without provider prefix
+        # Request comes in with provider prefix - should not match
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled=["openai"],
+            redis_cache=Mock(),
+            model="openai/o4-mini-deep-research",
+            llm_router=None,
+            native_background_mode=["o4-mini-deep-research"],  # Without prefix
+        )
+
+        # Should return True because "openai/o4-mini-deep-research" != "o4-mini-deep-research"
+        assert result is True
+
+    def test_native_background_mode_with_router_lookup(self):
+        """Test that native_background_mode works with router-resolved models"""
+        from litellm.proxy.response_polling.polling_handler import should_use_polling_for_request
+
+        mock_router = Mock()
+        mock_router.model_name_to_deployment_indices = {"deep-research": [0]}
+        mock_router.model_list = [
+            {
+                "model_name": "deep-research",
+                "litellm_params": {"model": "openai/o4-mini-deep-research"}
+            }
+        ]
+
+        # Model alias "deep-research" is in native_background_mode
+        result = should_use_polling_for_request(
+            background_mode=True,
+            polling_via_cache_enabled=["openai"],
+            redis_cache=Mock(),
+            model="deep-research",
+            llm_router=mock_router,
+            native_background_mode=["deep-research"],
+        )
+
+        assert result is False
+
 
 class TestStreamingEventParsing:
     """