BerriAI · ishaan-jaff · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
@@ -33,10 +33,9 @@
 
 if TYPE_CHECKING:
     from fastapi import HTTPException
-
-    from litellm.caching.caching import DualCache
     from opentelemetry.trace import Span as _Span
 
+    from litellm.caching.caching import DualCache
     from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
     from litellm.proxy._types import UserAPIKeyAuth
     from litellm.types.mcp import (
@@ -484,6 +483,138 @@ async def async_post_mcp_tool_call_hook(
         """
         return None
 
+    #########################################################
+    # AGENTIC LOOP HOOKS (for litellm.messages + future completion support)
+    #########################################################
+
+    async def async_should_run_agentic_loop(
+        self,
+        response: Any,
+        model: str,
+        messages: List[Dict],
+        tools: Optional[List[Dict]],
+        stream: bool,
+        custom_llm_provider: str,
+        kwargs: Dict,
+    ) -> Tuple[bool, Dict]:
+        """
+        Hook to determine if agentic loop should be executed.
+
+        Called after receiving response from model, before returning to user.
+
+        USE CASE: Enables transparent server-side tool execution for models that
+        don't natively support server-side tools. User makes ONE API call and gets
+        back the final answer - the agentic loop happens transparently on the server.
+
+        Example use cases:
+        - WebSearch: Intercept WebSearch tool calls for Bedrock/Claude, execute
+          litellm.search(), return final answer with search results
+        - Code execution: Execute code in sandboxed environment, return results
+        - Database queries: Execute queries server-side, return data to model
+        - API calls: Make external API calls and inject responses back into context
+
+        Flow:
+        1. User calls litellm.messages.acreate(tools=[...])
+        2. Model responds with tool_use
+        3. THIS HOOK checks if tool should run server-side
+        4. If True, async_run_agentic_loop executes the tool
+        5. User receives final answer (never sees intermediate tool_use)
+
+        Args:
+            response: Response from model (AnthropicMessagesResponse or AsyncIterator)
+            model: Model name
+            messages: Original messages sent to model
+            tools: List of tool definitions from request
+            stream: Whether response is streaming
+            custom_llm_provider: Provider name (e.g., "bedrock", "anthropic")
+            kwargs: Additional request parameters
+
+        Returns:
+            (should_run, tools):
+                should_run: True if agentic loop should execute
+                tools: Dict with tool_calls and metadata for execution
+
+        Example:
+            # Detect WebSearch tool call
+            if has_websearch_tool_use(response):
+                return True, {
+                    "tool_calls": extract_tool_calls(response),
+                    "tool_type": "websearch"
+                }
+            return False, {}
+        """
+        return False, {}
+
+    async def async_run_agentic_loop(
+        self,
+        tools: Dict,
+        model: str,
+        messages: List[Dict],
+        response: Any,
+        anthropic_messages_provider_config: Any,
+        anthropic_messages_optional_request_params: Dict,
+        logging_obj: "LiteLLMLoggingObj",
+        stream: bool,
+        kwargs: Dict,
+    ) -> Any:
+        """
+        Hook to execute agentic loop based on context from should_run hook.
+
+        Called only if async_messages_should_run_agentic_loop returns True.
+
+        USE CASE: Execute server-side tools and orchestrate the agentic loop to
+        return a complete answer to the user in a single API call.
+
+        What to do here:
+        1. Extract tool calls from tools dict
+        2. Execute the tools (litellm.search, code execution, DB queries, etc.)
+        3. Build assistant message with tool_use blocks
+        4. Build user message with tool_result blocks containing results
+        5. Make follow-up litellm.messages.acreate() call with results
+        6. Return the final response
+
+        Args:
+            tools: Dict from async_should_run_agentic_loop
+                  Contains tool_calls and metadata
+            model: Model name
+            messages: Original messages sent to model
+            response: Original response from model (with tool_use)
+            anthropic_messages_provider_config: Provider config for making requests
+            anthropic_messages_optional_request_params: Request parameters (tools, etc.)
+            logging_obj: LiteLLM logging object
+            stream: Whether response is streaming
+            kwargs: Additional request parameters
+
+        Returns:
+            Final response after executing agentic loop
+            (AnthropicMessagesResponse with final answer)
+
+        Example:
+            # Extract tool calls
+            tool_calls = agentic_context["tool_calls"]
+
+            # Execute searches in parallel
+            search_results = await asyncio.gather(
+                *[litellm.asearch(tc["input"]["query"]) for tc in tool_calls]
+            )
+
+            # Build messages with tool results
+            assistant_msg = {"role": "assistant", "content": [...tool_use blocks...]}
+            user_msg = {"role": "user", "content": [...tool_result blocks...]}
+
+            # Make follow-up request
+            from litellm.anthropic_interface import messages
+            final_response = await messages.acreate(
+                model=model,
+                messages=messages + [assistant_msg, user_msg],
+                max_tokens=anthropic_messages_optional_request_params.get("max_tokens"),
+                **anthropic_messages_optional_request_params
+            )
+
+            return final_response
+        """
+        pass
+
     # Useful helpers for custom logger classes
 
     def truncate_standard_logging_payload_content(

diff --git a/litellm/integrations/websearch_interception/ARCHITECTURE.md b/litellm/integrations/websearch_interception/ARCHITECTURE.md
@@ -0,0 +1,182 @@
+# WebSearch Interception Architecture
+
+Server-side WebSearch tool execution for models that don't natively support it (e.g., Bedrock/Claude).
+
+## How It Works
+
+User makes **ONE** `litellm.messages.acreate()` call → Gets final answer with search results.
+The agentic loop happens transparently on the server.
+
+---
+
+## Request Flow
+
+### Without Interception (Client-Side)
+User manually handles tool execution:
+1. User calls `litellm.messages.acreate()` → Gets `tool_use` response
+2. User executes `litellm.asearch()`
+3. User calls `litellm.messages.acreate()` again with results
+4. User gets final answer
+
+**Result**: 2 API calls, manual tool execution
+
+### With Interception (Server-Side)
+Server handles tool execution automatically:
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant Messages as litellm.messages.acreate()
+    participant Handler as llm_http_handler.py
+    participant Logger as WebSearchInterceptionLogger
+    participant Router as proxy_server.llm_router
+    participant Search as litellm.asearch()
+    participant Provider as Bedrock API
+
+    User->>Messages: acreate(tools=[WebSearch])
+    Messages->>Handler: async_anthropic_messages_handler()
+    Handler->>Provider: Request
+    Provider-->>Handler: Response (tool_use)
+    Handler->>Logger: async_should_run_agentic_loop()
+    Logger->>Logger: Detect WebSearch tool_use
+    Logger-->>Handler: (True, tools)
+    Handler->>Logger: async_run_agentic_loop(tools)
+    Logger->>Router: Get search_provider from search_tools
+    Router-->>Logger: search_provider
+    Logger->>Search: asearch(query, provider)
+    Search-->>Logger: Search results
+    Logger->>Logger: Build tool_result message
+    Logger->>Messages: acreate() with results
+    Messages->>Provider: Request with search results
+    Provider-->>Messages: Final answer
+    Messages-->>Logger: Final response
+    Logger-->>Handler: Final response
+    Handler-->>User: Final answer (with search results)
+```
+
+**Result**: 1 API call from user, server handles agentic loop
+
+---
+
+## Key Components
+
+| Component | File | Purpose |
+|-----------|------|---------|
+| **WebSearchInterceptionLogger** | `handler.py` | CustomLogger that implements agentic loop hooks |
+| **Transformation Logic** | `transformation.py` | Detect tool_use, build tool_result messages, format search responses |
+| **Agentic Loop Hooks** | `integrations/custom_logger.py` | Base hooks: `async_should_run_agentic_loop()`, `async_run_agentic_loop()` |
+| **Hook Orchestration** | `llms/custom_httpx/llm_http_handler.py` | `_call_agentic_completion_hooks()` - calls hooks after response |
+| **Router Search Tools** | `proxy/proxy_server.py` | `llm_router.search_tools` - configured search providers |
+| **Search Endpoints** | `proxy/search_endpoints/endpoints.py` | Router logic for selecting search provider |
+
+---
+
+## Configuration
+
+```python
+from litellm.integrations.websearch_interception import WebSearchInterceptionLogger
+from litellm.types.utils import LlmProviders
+
+# Enable for Bedrock with specific search tool
+litellm.callbacks = [
+    WebSearchInterceptionLogger(
+        enabled_providers=[LlmProviders.BEDROCK],
+        search_tool_name="my-perplexity-tool"  # Optional: uses router's first tool if None
+    )
+]
+
+# Make request (streaming or non-streaming both work)
+response = await litellm.messages.acreate(
+    model="bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0",
+    messages=[{"role": "user", "content": "What is LiteLLM?"}],
+    tools=[{"name": "WebSearch", ...}],
+    max_tokens=1024,
+    stream=True  # Streaming is automatically converted to non-streaming for WebSearch
+)
+```
+
+---
+
+## Streaming Support
+
+WebSearch interception works transparently with both streaming and non-streaming requests.
+
+**How streaming is handled:**
+1. User makes request with `stream=True` and WebSearch tool
+2. Before API call, `anthropic_messages()` detects WebSearch + interception enabled
+3. Converts `stream=True` → `stream=False` internally
+4. Agentic loop executes with non-streaming responses
+5. Final response returned to user (non-streaming)
+
+**Why this approach:**
+- Server-side agentic loops require consuming full responses to detect tool_use
+- User opts into this behavior by enabling WebSearch interception
+- Provides seamless experience without client changes
+
+**Testing:**
+- **Non-streaming**: `test_websearch_interception_e2e.py`
+- **Streaming**: `test_websearch_interception_streaming_e2e.py`
+
+---
+
+## Search Provider Selection
+
+1. If `search_tool_name` specified → Look up in `llm_router.search_tools`
+2. If not found or None → Use first available search tool
+3. If no router or no tools → Fallback to `perplexity`
+
+Example router config:
+```yaml
+search_tools:
+  - search_tool_name: "my-perplexity-tool"
+    litellm_params:
+      search_provider: "perplexity"
+  - search_tool_name: "my-tavily-tool"
+    litellm_params:
+      search_provider: "tavily"
+```
+
+---
+
+## Message Flow
+
+### Initial Request
+```python
+messages = [{"role": "user", "content": "What is LiteLLM?"}]
+tools = [{"name": "WebSearch", ...}]
+```
+
+### First API Call (Internal)
+**Response**: `tool_use` with `name="WebSearch"`, `input={"query": "what is litellm"}`
+
+### Server Processing
+1. Logger detects WebSearch tool_use
+2. Looks up search provider from router
+3. Executes `litellm.asearch(query="what is litellm", search_provider="perplexity")`
+4. Gets results: `"Title: LiteLLM Docs\nURL: docs.litellm.ai\n..."`
+
+### Follow-Up Request (Internal)
+```python
+messages = [
+    {"role": "user", "content": "What is LiteLLM?"},
+    {"role": "assistant", "content": [{"type": "tool_use", ...}]},
+    {"role": "user", "content": [{"type": "tool_result", "content": "search results..."}]}
+]
+```
+
+### User Receives
+```python
+response.content[0].text
+# "Based on the search results, LiteLLM is a unified interface..."
+```
+
+---
+
+## Testing
+
+**E2E Tests**:
+- `test_websearch_interception_e2e.py` - Non-streaming real API calls to Bedrock
+- `test_websearch_interception_streaming_e2e.py` - Streaming real API calls to Bedrock
+
+**Unit Tests**: `test_websearch_interception.py`
+Mocked tests for tool detection, provider filtering, edge cases.
diff --git a/litellm/integrations/websearch_interception/__init__.py b/litellm/integrations/websearch_interception/__init__.py
@@ -0,0 +1,12 @@
+"""
+WebSearch Interception Module
+
+Provides server-side WebSearch tool execution for models that don't natively
+support server-side tool calling (e.g., Bedrock/Claude).
+"""
+
+from litellm.integrations.websearch_interception.handler import (
+    WebSearchInterceptionLogger,
+)
+
+__all__ = ["WebSearchInterceptionLogger"]