Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion litellm/integrations/websearch_interception/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,54 @@ async def async_should_run_agentic_loop(
f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
)

# Return tools dict with tool calls
# Extract thinking blocks from response content.
# When extended thinking is enabled, the model response includes
# thinking/redacted_thinking blocks that must be preserved and
# prepended to the follow-up assistant message.
thinking_blocks: List[Dict] = []
if isinstance(response, dict):
content = response.get("content", [])
else:
content = getattr(response, "content", []) or []

for block in content:
if isinstance(block, dict):
block_type = block.get("type")
else:
block_type = getattr(block, "type", None)

if block_type in ("thinking", "redacted_thinking"):
if isinstance(block, dict):
thinking_blocks.append(block)
else:
# Convert object to dict using getattr, matching the
# pattern in _detect_from_non_streaming_response
thinking_block_dict: Dict = {"type": block_type}
if block_type == "thinking":
thinking_block_dict["thinking"] = getattr(
block, "thinking", ""
)
thinking_block_dict["signature"] = getattr(
block, "signature", ""
)
else: # redacted_thinking
thinking_block_dict["data"] = getattr(
block, "data", ""
)
thinking_blocks.append(thinking_block_dict)
Comment on lines +322 to +336
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing cache_control field when converting object to dict

thinking blocks can include an optional cache_control field (see ChatCompletionThinkingBlock and ChatCompletionRedactedThinkingBlock in types/llms/openai.py), but this conversion only copies type, thinking, signature, and data fields

Suggested change
# Convert object to dict using getattr, matching the
# pattern in _detect_from_non_streaming_response
thinking_block_dict: Dict = {"type": block_type}
if block_type == "thinking":
thinking_block_dict["thinking"] = getattr(
block, "thinking", ""
)
thinking_block_dict["signature"] = getattr(
block, "signature", ""
)
else: # redacted_thinking
thinking_block_dict["data"] = getattr(
block, "data", ""
)
thinking_blocks.append(thinking_block_dict)
# Convert object to dict using getattr, matching the
# pattern in _detect_from_non_streaming_response
thinking_block_dict: Dict = {"type": block_type}
if block_type == "thinking":
thinking_block_dict["thinking"] = getattr(
block, "thinking", ""
)
thinking_block_dict["signature"] = getattr(
block, "signature", ""
)
else: # redacted_thinking
thinking_block_dict["data"] = getattr(
block, "data", ""
)
# Preserve cache_control if present
cache_control = getattr(block, "cache_control", None)
if cache_control is not None:
thinking_block_dict["cache_control"] = cache_control
thinking_blocks.append(thinking_block_dict)


if thinking_blocks:
verbose_logger.debug(
f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response"
)

# Return tools dict with tool calls and thinking blocks
tools_dict = {
"tool_calls": tool_calls,
"tool_type": "websearch",
"provider": custom_llm_provider,
"response_format": "anthropic",
"thinking_blocks": thinking_blocks,
}
return True, tools_dict

Expand Down Expand Up @@ -387,6 +429,7 @@ async def async_run_agentic_loop(
"""

tool_calls = tools["tool_calls"]
thinking_blocks = tools.get("thinking_blocks", [])

verbose_logger.debug(
f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)"
Expand All @@ -396,6 +439,7 @@ async def async_run_agentic_loop(
model=model,
messages=messages,
tool_calls=tool_calls,
thinking_blocks=thinking_blocks,
anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
logging_obj=logging_obj,
stream=stream,
Expand Down Expand Up @@ -442,6 +486,7 @@ async def _execute_agentic_loop(
model: str,
messages: List[Dict],
tool_calls: List[Dict],
thinking_blocks: List[Dict],
anthropic_messages_optional_request_params: Dict,
logging_obj: Any,
stream: bool,
Expand Down Expand Up @@ -495,6 +540,7 @@ async def _execute_agentic_loop(
assistant_message, user_message = WebSearchTransformation.transform_response(
tool_calls=tool_calls,
search_results=final_search_results,
thinking_blocks=thinking_blocks,
)

# Make follow-up request with search results
Expand Down
34 changes: 27 additions & 7 deletions litellm/integrations/websearch_interception/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Transforms between Anthropic/OpenAI tool_use format and LiteLLM search format.
"""
import json
from typing import Any, Dict, List, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

from litellm._logging import verbose_logger
from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME
Expand Down Expand Up @@ -224,6 +224,7 @@ def transform_response(
tool_calls: List[Dict],
search_results: List[str],
response_format: str = "anthropic",
thinking_blocks: Optional[List[Dict]] = None,
) -> Tuple[Dict, Union[Dict, List[Dict]]]:
"""
Transform LiteLLM search results to Anthropic/OpenAI tool_result format.
Expand All @@ -235,6 +236,10 @@ def transform_response(
tool_calls: List of tool_use/tool_calls dicts from transform_request
search_results: List of search result strings (one per tool_call)
response_format: Response format - "anthropic" or "openai" (default: "anthropic")
thinking_blocks: Optional list of thinking/redacted_thinking blocks
from the model's response. When present, prepended to the
assistant message content (required by Anthropic API when
thinking is enabled).

Returns:
(assistant_message, user_or_tool_messages):
Expand All @@ -247,27 +252,42 @@ def transform_response(
)
else:
return WebSearchTransformation._transform_response_anthropic(
tool_calls, search_results
tool_calls, search_results, thinking_blocks=thinking_blocks
)

@staticmethod
def _transform_response_anthropic(
tool_calls: List[Dict],
search_results: List[str],
thinking_blocks: Optional[List[Dict]] = None,
) -> Tuple[Dict, Dict]:
"""Transform to Anthropic format (single user message with tool_result blocks)"""
# Build assistant message with tool_use blocks
assistant_message = {
"role": "assistant",
"content": [
# Build assistant message content
assistant_content: List[Dict] = []

# Prepend thinking blocks if present.
# When extended thinking is enabled, Anthropic requires the assistant
# message to start with thinking/redacted_thinking blocks before any
# tool_use blocks. Same pattern as anthropic_messages_pt in factory.py.
if thinking_blocks:
assistant_content.extend(thinking_blocks)

# Add tool_use blocks
assistant_content.extend(
[
{
"type": "tool_use",
"id": tc["id"],
"name": tc["name"],
"input": tc["input"],
}
for tc in tool_calls
],
]
)

assistant_message = {
"role": "assistant",
"content": assistant_content,
}

# Build user message with tool_result blocks
Expand Down
Loading
Loading