Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 133 additions & 2 deletions litellm/integrations/custom_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@

if TYPE_CHECKING:
from fastapi import HTTPException

from litellm.caching.caching import DualCache
from opentelemetry.trace import Span as _Span

from litellm.caching.caching import DualCache
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.mcp import (
Expand Down Expand Up @@ -484,6 +483,138 @@ async def async_post_mcp_tool_call_hook(
"""
return None

#########################################################
# AGENTIC LOOP HOOKS (for litellm.messages + future completion support)
#########################################################

async def async_should_run_agentic_loop(
self,
response: Any,
model: str,
messages: List[Dict],
tools: Optional[List[Dict]],
stream: bool,
custom_llm_provider: str,
kwargs: Dict,
) -> Tuple[bool, Dict]:
"""
Hook to determine if agentic loop should be executed.

Called after receiving response from model, before returning to user.

USE CASE: Enables transparent server-side tool execution for models that
don't natively support server-side tools. User makes ONE API call and gets
back the final answer - the agentic loop happens transparently on the server.

Example use cases:
- WebSearch: Intercept WebSearch tool calls for Bedrock/Claude, execute
litellm.search(), return final answer with search results
- Code execution: Execute code in sandboxed environment, return results
- Database queries: Execute queries server-side, return data to model
- API calls: Make external API calls and inject responses back into context

Flow:
1. User calls litellm.messages.acreate(tools=[...])
2. Model responds with tool_use
3. THIS HOOK checks if tool should run server-side
4. If True, async_run_agentic_loop executes the tool
5. User receives final answer (never sees intermediate tool_use)

Args:
response: Response from model (AnthropicMessagesResponse or AsyncIterator)
model: Model name
messages: Original messages sent to model
tools: List of tool definitions from request
stream: Whether response is streaming
custom_llm_provider: Provider name (e.g., "bedrock", "anthropic")
kwargs: Additional request parameters

Returns:
(should_run, tools):
should_run: True if agentic loop should execute
tools: Dict with tool_calls and metadata for execution

Example:
# Detect WebSearch tool call
if has_websearch_tool_use(response):
return True, {
"tool_calls": extract_tool_calls(response),
"tool_type": "websearch"
}
return False, {}
"""
return False, {}

async def async_run_agentic_loop(
self,
tools: Dict,
model: str,
messages: List[Dict],
response: Any,
anthropic_messages_provider_config: Any,
anthropic_messages_optional_request_params: Dict,
logging_obj: "LiteLLMLoggingObj",
stream: bool,
kwargs: Dict,
) -> Any:
"""
Hook to execute agentic loop based on context from should_run hook.

Called only if async_messages_should_run_agentic_loop returns True.

USE CASE: Execute server-side tools and orchestrate the agentic loop to
return a complete answer to the user in a single API call.

What to do here:
1. Extract tool calls from tools dict
2. Execute the tools (litellm.search, code execution, DB queries, etc.)
3. Build assistant message with tool_use blocks
4. Build user message with tool_result blocks containing results
5. Make follow-up litellm.messages.acreate() call with results
6. Return the final response

Args:
tools: Dict from async_should_run_agentic_loop
Contains tool_calls and metadata
model: Model name
messages: Original messages sent to model
response: Original response from model (with tool_use)
anthropic_messages_provider_config: Provider config for making requests
anthropic_messages_optional_request_params: Request parameters (tools, etc.)
logging_obj: LiteLLM logging object
stream: Whether response is streaming
kwargs: Additional request parameters

Returns:
Final response after executing agentic loop
(AnthropicMessagesResponse with final answer)

Example:
# Extract tool calls
tool_calls = agentic_context["tool_calls"]

# Execute searches in parallel
search_results = await asyncio.gather(
*[litellm.asearch(tc["input"]["query"]) for tc in tool_calls]
)

# Build messages with tool results
assistant_msg = {"role": "assistant", "content": [...tool_use blocks...]}
user_msg = {"role": "user", "content": [...tool_result blocks...]}

# Make follow-up request
from litellm.anthropic_interface import messages
final_response = await messages.acreate(
model=model,
messages=messages + [assistant_msg, user_msg],
max_tokens=anthropic_messages_optional_request_params.get("max_tokens"),
**anthropic_messages_optional_request_params
)

return final_response
"""
pass

# Useful helpers for custom logger classes

def truncate_standard_logging_payload_content(
Expand Down
182 changes: 182 additions & 0 deletions litellm/integrations/websearch_interception/ARCHITECTURE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# WebSearch Interception Architecture

Server-side WebSearch tool execution for models that don't natively support it (e.g., Bedrock/Claude).

## How It Works

User makes **ONE** `litellm.messages.acreate()` call → Gets final answer with search results.
The agentic loop happens transparently on the server.

---

## Request Flow

### Without Interception (Client-Side)
User manually handles tool execution:
1. User calls `litellm.messages.acreate()` → Gets `tool_use` response
2. User executes `litellm.asearch()`
3. User calls `litellm.messages.acreate()` again with results
4. User gets final answer

**Result**: 2 API calls, manual tool execution

### With Interception (Server-Side)
Server handles tool execution automatically:

```mermaid
sequenceDiagram
participant User
participant Messages as litellm.messages.acreate()
participant Handler as llm_http_handler.py
participant Logger as WebSearchInterceptionLogger
participant Router as proxy_server.llm_router
participant Search as litellm.asearch()
participant Provider as Bedrock API

User->>Messages: acreate(tools=[WebSearch])
Messages->>Handler: async_anthropic_messages_handler()
Handler->>Provider: Request
Provider-->>Handler: Response (tool_use)
Handler->>Logger: async_should_run_agentic_loop()
Logger->>Logger: Detect WebSearch tool_use
Logger-->>Handler: (True, tools)
Handler->>Logger: async_run_agentic_loop(tools)
Logger->>Router: Get search_provider from search_tools
Router-->>Logger: search_provider
Logger->>Search: asearch(query, provider)
Search-->>Logger: Search results
Logger->>Logger: Build tool_result message
Logger->>Messages: acreate() with results
Messages->>Provider: Request with search results
Provider-->>Messages: Final answer
Messages-->>Logger: Final response
Logger-->>Handler: Final response
Handler-->>User: Final answer (with search results)
```

**Result**: 1 API call from user, server handles agentic loop

---

## Key Components

| Component | File | Purpose |
|-----------|------|---------|
| **WebSearchInterceptionLogger** | `handler.py` | CustomLogger that implements agentic loop hooks |
| **Transformation Logic** | `transformation.py` | Detect tool_use, build tool_result messages, format search responses |
| **Agentic Loop Hooks** | `integrations/custom_logger.py` | Base hooks: `async_should_run_agentic_loop()`, `async_run_agentic_loop()` |
| **Hook Orchestration** | `llms/custom_httpx/llm_http_handler.py` | `_call_agentic_completion_hooks()` - calls hooks after response |
| **Router Search Tools** | `proxy/proxy_server.py` | `llm_router.search_tools` - configured search providers |
| **Search Endpoints** | `proxy/search_endpoints/endpoints.py` | Router logic for selecting search provider |

---

## Configuration

```python
from litellm.integrations.websearch_interception import WebSearchInterceptionLogger
from litellm.types.utils import LlmProviders

# Enable for Bedrock with specific search tool
litellm.callbacks = [
WebSearchInterceptionLogger(
enabled_providers=[LlmProviders.BEDROCK],
search_tool_name="my-perplexity-tool" # Optional: uses router's first tool if None
)
]

# Make request (streaming or non-streaming both work)
response = await litellm.messages.acreate(
model="bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0",
messages=[{"role": "user", "content": "What is LiteLLM?"}],
tools=[{"name": "WebSearch", ...}],
max_tokens=1024,
stream=True # Streaming is automatically converted to non-streaming for WebSearch
)
```

---

## Streaming Support

WebSearch interception works transparently with both streaming and non-streaming requests.

**How streaming is handled:**
1. User makes request with `stream=True` and WebSearch tool
2. Before API call, `anthropic_messages()` detects WebSearch + interception enabled
3. Converts `stream=True` → `stream=False` internally
4. Agentic loop executes with non-streaming responses
5. Final response returned to user (non-streaming)

**Why this approach:**
- Server-side agentic loops require consuming full responses to detect tool_use
- User opts into this behavior by enabling WebSearch interception
- Provides seamless experience without client changes

**Testing:**
- **Non-streaming**: `test_websearch_interception_e2e.py`
- **Streaming**: `test_websearch_interception_streaming_e2e.py`

---

## Search Provider Selection

1. If `search_tool_name` specified → Look up in `llm_router.search_tools`
2. If not found or None → Use first available search tool
3. If no router or no tools → Fallback to `perplexity`

Example router config:
```yaml
search_tools:
- search_tool_name: "my-perplexity-tool"
litellm_params:
search_provider: "perplexity"
- search_tool_name: "my-tavily-tool"
litellm_params:
search_provider: "tavily"
```

---

## Message Flow

### Initial Request
```python
messages = [{"role": "user", "content": "What is LiteLLM?"}]
tools = [{"name": "WebSearch", ...}]
```

### First API Call (Internal)
**Response**: `tool_use` with `name="WebSearch"`, `input={"query": "what is litellm"}`

### Server Processing
1. Logger detects WebSearch tool_use
2. Looks up search provider from router
3. Executes `litellm.asearch(query="what is litellm", search_provider="perplexity")`
4. Gets results: `"Title: LiteLLM Docs\nURL: docs.litellm.ai\n..."`

### Follow-Up Request (Internal)
```python
messages = [
{"role": "user", "content": "What is LiteLLM?"},
{"role": "assistant", "content": [{"type": "tool_use", ...}]},
{"role": "user", "content": [{"type": "tool_result", "content": "search results..."}]}
]
```

### User Receives
```python
response.content[0].text
# "Based on the search results, LiteLLM is a unified interface..."
```

---

## Testing

**E2E Tests**:
- `test_websearch_interception_e2e.py` - Non-streaming real API calls to Bedrock
- `test_websearch_interception_streaming_e2e.py` - Streaming real API calls to Bedrock

**Unit Tests**: `test_websearch_interception.py`
Mocked tests for tool detection, provider filtering, edge cases.
12 changes: 12 additions & 0 deletions litellm/integrations/websearch_interception/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""
WebSearch Interception Module

Provides server-side WebSearch tool execution for models that don't natively
support server-side tool calling (e.g., Bedrock/Claude).
"""

from litellm.integrations.websearch_interception.handler import (
WebSearchInterceptionLogger,
)

__all__ = ["WebSearchInterceptionLogger"]
Loading
Loading