diff --git a/litellm/integrations/websearch_interception/handler.py b/litellm/integrations/websearch_interception/handler.py index d7858d71eb3..bef8925e8e9 100644 --- a/litellm/integrations/websearch_interception/handler.py +++ b/litellm/integrations/websearch_interception/handler.py @@ -299,12 +299,54 @@ async def async_should_run_agentic_loop( f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop" ) - # Return tools dict with tool calls + # Extract thinking blocks from response content. + # When extended thinking is enabled, the model response includes + # thinking/redacted_thinking blocks that must be preserved and + # prepended to the follow-up assistant message. + thinking_blocks: List[Dict] = [] + if isinstance(response, dict): + content = response.get("content", []) + else: + content = getattr(response, "content", []) or [] + + for block in content: + if isinstance(block, dict): + block_type = block.get("type") + else: + block_type = getattr(block, "type", None) + + if block_type in ("thinking", "redacted_thinking"): + if isinstance(block, dict): + thinking_blocks.append(block) + else: + # Convert object to dict using getattr, matching the + # pattern in _detect_from_non_streaming_response + thinking_block_dict: Dict = {"type": block_type} + if block_type == "thinking": + thinking_block_dict["thinking"] = getattr( + block, "thinking", "" + ) + thinking_block_dict["signature"] = getattr( + block, "signature", "" + ) + else: # redacted_thinking + thinking_block_dict["data"] = getattr( + block, "data", "" + ) + thinking_blocks.append(thinking_block_dict) + + if thinking_blocks: + verbose_logger.debug( + f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response" + ) + + # Return tools dict with tool calls and thinking blocks tools_dict = { "tool_calls": tool_calls, "tool_type": "websearch", "provider": custom_llm_provider, "response_format": "anthropic", + "thinking_blocks": thinking_blocks, } return True, tools_dict @@ -387,6 +429,7 @@ async def async_run_agentic_loop( """ tool_calls = tools["tool_calls"] + thinking_blocks = tools.get("thinking_blocks", []) verbose_logger.debug( f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)" @@ -396,6 +439,7 @@ async def async_run_agentic_loop( model=model, messages=messages, tool_calls=tool_calls, + thinking_blocks=thinking_blocks, anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, logging_obj=logging_obj, stream=stream, @@ -442,6 +486,7 @@ async def _execute_agentic_loop( model: str, messages: List[Dict], tool_calls: List[Dict], + thinking_blocks: List[Dict], anthropic_messages_optional_request_params: Dict, logging_obj: Any, stream: bool, @@ -495,6 +540,7 @@ async def _execute_agentic_loop( assistant_message, user_message = WebSearchTransformation.transform_response( tool_calls=tool_calls, search_results=final_search_results, + thinking_blocks=thinking_blocks, ) # Make follow-up request with search results diff --git a/litellm/integrations/websearch_interception/transformation.py b/litellm/integrations/websearch_interception/transformation.py index e44ec35c3a2..e016899e0c3 100644 --- a/litellm/integrations/websearch_interception/transformation.py +++ b/litellm/integrations/websearch_interception/transformation.py @@ -4,7 +4,7 @@ Transforms between Anthropic/OpenAI tool_use format and LiteLLM search format. """ import json -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union from litellm._logging import verbose_logger from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME @@ -224,6 +224,7 @@ def transform_response( tool_calls: List[Dict], search_results: List[str], response_format: str = "anthropic", + thinking_blocks: Optional[List[Dict]] = None, ) -> Tuple[Dict, Union[Dict, List[Dict]]]: """ Transform LiteLLM search results to Anthropic/OpenAI tool_result format. @@ -235,6 +236,10 @@ def transform_response( tool_calls: List of tool_use/tool_calls dicts from transform_request search_results: List of search result strings (one per tool_call) response_format: Response format - "anthropic" or "openai" (default: "anthropic") + thinking_blocks: Optional list of thinking/redacted_thinking blocks + from the model's response. When present, prepended to the + assistant message content (required by Anthropic API when + thinking is enabled). Returns: (assistant_message, user_or_tool_messages): @@ -247,19 +252,29 @@ def transform_response( ) else: return WebSearchTransformation._transform_response_anthropic( - tool_calls, search_results + tool_calls, search_results, thinking_blocks=thinking_blocks ) @staticmethod def _transform_response_anthropic( tool_calls: List[Dict], search_results: List[str], + thinking_blocks: Optional[List[Dict]] = None, ) -> Tuple[Dict, Dict]: """Transform to Anthropic format (single user message with tool_result blocks)""" - # Build assistant message with tool_use blocks - assistant_message = { - "role": "assistant", - "content": [ + # Build assistant message content + assistant_content: List[Dict] = [] + + # Prepend thinking blocks if present. + # When extended thinking is enabled, Anthropic requires the assistant + # message to start with thinking/redacted_thinking blocks before any + # tool_use blocks. Same pattern as anthropic_messages_pt in factory.py. + if thinking_blocks: + assistant_content.extend(thinking_blocks) + + # Add tool_use blocks + assistant_content.extend( + [ { "type": "tool_use", "id": tc["id"], @@ -267,7 +282,12 @@ def _transform_response_anthropic( "input": tc["input"], } for tc in tool_calls - ], + ] + ) + + assistant_message = { + "role": "assistant", + "content": assistant_content, } # Build user message with tool_result blocks diff --git a/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_thinking.py b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_thinking.py new file mode 100644 index 00000000000..8093ce6fc12 --- /dev/null +++ b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_thinking.py @@ -0,0 +1,327 @@ +""" +Unit tests for WebSearch Interception with Extended Thinking + +Tests that the websearch interception agentic loop correctly handles +thinking/redacted_thinking blocks when extended thinking is enabled. +""" + +from unittest.mock import Mock + +import pytest + +from litellm.integrations.websearch_interception.handler import ( + WebSearchInterceptionLogger, +) +from litellm.integrations.websearch_interception.transformation import ( + WebSearchTransformation, +) + + +class TestTransformResponseWithThinking: + """Tests for _transform_response_anthropic with thinking blocks.""" + + def test_thinking_blocks_prepended_to_assistant_message(self): + """Test that thinking blocks are prepended before tool_use blocks.""" + tool_calls = [ + { + "id": "toolu_01", + "type": "tool_use", + "name": "litellm_web_search", + "input": {"query": "latest news"}, + } + ] + search_results = [ + "Title: News\nURL: https://example.com\nSnippet: Latest news" + ] + thinking_blocks = [ + { + "type": "thinking", + "thinking": "Let me search for that.", + "signature": "sig123", + }, + {"type": "redacted_thinking", "data": "abc123"}, + ] + + assistant_msg, user_msg = ( + WebSearchTransformation._transform_response_anthropic( + tool_calls=tool_calls, + search_results=search_results, + thinking_blocks=thinking_blocks, + ) + ) + + # Verify thinking blocks come first + content = assistant_msg["content"] + assert len(content) == 3 # 2 thinking + 1 tool_use + assert content[0]["type"] == "thinking" + assert content[0]["thinking"] == "Let me search for that." + assert content[1]["type"] == "redacted_thinking" + assert content[1]["data"] == "abc123" + assert content[2]["type"] == "tool_use" + assert content[2]["id"] == "toolu_01" + + def test_no_thinking_blocks_backward_compat(self): + """Test that transform works without thinking blocks (backward compat).""" + tool_calls = [ + { + "id": "toolu_01", + "type": "tool_use", + "name": "litellm_web_search", + "input": {"query": "test"}, + } + ] + search_results = ["Search result text"] + + # No thinking_blocks param (default None) + assistant_msg, _ = ( + WebSearchTransformation._transform_response_anthropic( + tool_calls=tool_calls, + search_results=search_results, + ) + ) + + content = assistant_msg["content"] + assert len(content) == 1 + assert content[0]["type"] == "tool_use" + + def test_empty_thinking_blocks_list(self): + """Test that an empty thinking_blocks list behaves like None.""" + tool_calls = [ + { + "id": "toolu_01", + "type": "tool_use", + "name": "litellm_web_search", + "input": {"query": "test"}, + } + ] + search_results = ["Search result text"] + + assistant_msg, _ = ( + WebSearchTransformation._transform_response_anthropic( + tool_calls=tool_calls, + search_results=search_results, + thinking_blocks=[], + ) + ) + + content = assistant_msg["content"] + assert len(content) == 1 + assert content[0]["type"] == "tool_use" + + def test_transform_response_passes_thinking_to_anthropic(self): + """Test that transform_response routes thinking_blocks correctly.""" + tool_calls = [ + { + "id": "toolu_01", + "type": "tool_use", + "name": "litellm_web_search", + "input": {"query": "test"}, + } + ] + search_results = ["Search result"] + thinking_blocks = [ + { + "type": "thinking", + "thinking": "Reasoning here.", + "signature": "sig", + }, + ] + + assistant_msg, _ = WebSearchTransformation.transform_response( + tool_calls=tool_calls, + search_results=search_results, + response_format="anthropic", + thinking_blocks=thinking_blocks, + ) + + content = assistant_msg["content"] + assert content[0]["type"] == "thinking" + assert content[1]["type"] == "tool_use" + + def test_transform_response_openai_ignores_thinking(self): + """Test that OpenAI format is unaffected by thinking_blocks param.""" + tool_calls = [ + { + "id": "call_01", + "type": "function", + "name": "litellm_web_search", + "function": { + "name": "litellm_web_search", + "arguments": {"query": "test"}, + }, + "input": {"query": "test"}, + } + ] + search_results = ["Search result"] + thinking_blocks = [ + { + "type": "thinking", + "thinking": "Should not appear.", + "signature": "sig", + }, + ] + + assistant_msg, _ = WebSearchTransformation.transform_response( + tool_calls=tool_calls, + search_results=search_results, + response_format="openai", + thinking_blocks=thinking_blocks, + ) + + # OpenAI format uses tool_calls key, not content — thinking is irrelevant + assert "tool_calls" in assistant_msg + assert "content" not in assistant_msg + + +class TestAgenticLoopThinkingExtraction: + """Tests for thinking block extraction in async_should_run_agentic_loop.""" + + @pytest.mark.asyncio + async def test_extracts_thinking_blocks_from_dict_response(self): + """Test extraction of thinking blocks from dict-style response.""" + logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"]) + + response = { + "content": [ + { + "type": "thinking", + "thinking": "Let me think...", + "signature": "sig1", + }, + {"type": "redacted_thinking", "data": "redacted_data"}, + { + "type": "tool_use", + "id": "toolu_01", + "name": "litellm_web_search", + "input": {"query": "latest news"}, + }, + ] + } + + should_run, tools_dict = await logger.async_should_run_agentic_loop( + response=response, + model="bedrock/claude", + messages=[], + tools=[{"name": "WebSearch"}], + stream=False, + custom_llm_provider="bedrock", + kwargs={}, + ) + + assert should_run is True + assert len(tools_dict["tool_calls"]) == 1 + assert len(tools_dict["thinking_blocks"]) == 2 + assert tools_dict["thinking_blocks"][0]["type"] == "thinking" + assert tools_dict["thinking_blocks"][0]["thinking"] == "Let me think..." + assert tools_dict["thinking_blocks"][1]["type"] == "redacted_thinking" + assert tools_dict["thinking_blocks"][1]["data"] == "redacted_data" + + @pytest.mark.asyncio + async def test_extracts_thinking_blocks_from_object_response(self): + """Test extraction of thinking blocks from non-dict response objects. + + In practice, the Anthropic pass-through always returns plain dicts + (TypedDict(**raw_json) produces a dict). This test covers the safety + branch for non-dict response objects. + """ + logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"]) + + # Simulate object-style response blocks + thinking_block = Mock() + thinking_block.type = "thinking" + thinking_block.thinking = "Reasoning..." + thinking_block.signature = "sig" + + redacted_block = Mock() + redacted_block.type = "redacted_thinking" + redacted_block.data = "abc" + + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "litellm_web_search" + tool_block.id = "toolu_01" + tool_block.input = {"query": "test"} + + response = Mock() + response.content = [thinking_block, redacted_block, tool_block] + + should_run, tools_dict = await logger.async_should_run_agentic_loop( + response=response, + model="bedrock/claude", + messages=[], + tools=[{"name": "WebSearch"}], + stream=False, + custom_llm_provider="bedrock", + kwargs={}, + ) + + assert should_run is True + assert len(tools_dict["thinking_blocks"]) == 2 + # Verify getattr-based conversion produced correct dicts + assert tools_dict["thinking_blocks"][0] == { + "type": "thinking", + "thinking": "Reasoning...", + "signature": "sig", + } + assert tools_dict["thinking_blocks"][1] == { + "type": "redacted_thinking", + "data": "abc", + } + + @pytest.mark.asyncio + async def test_no_thinking_blocks_when_thinking_disabled(self): + """Test that thinking_blocks is empty when response has no thinking.""" + logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"]) + + response = { + "content": [ + { + "type": "tool_use", + "id": "toolu_01", + "name": "litellm_web_search", + "input": {"query": "test"}, + }, + ] + } + + should_run, tools_dict = await logger.async_should_run_agentic_loop( + response=response, + model="bedrock/claude", + messages=[], + tools=[{"name": "WebSearch"}], + stream=False, + custom_llm_provider="bedrock", + kwargs={}, + ) + + assert should_run is True + assert tools_dict["thinking_blocks"] == [] + + @pytest.mark.asyncio + async def test_thinking_blocks_not_extracted_when_no_tool_calls(self): + """Test that no extraction happens when no websearch tool calls found.""" + logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"]) + + response = { + "content": [ + { + "type": "thinking", + "thinking": "Just thinking...", + "signature": "sig", + }, + {"type": "text", "text": "Here is my response."}, + ] + } + + should_run, tools_dict = await logger.async_should_run_agentic_loop( + response=response, + model="bedrock/claude", + messages=[], + tools=[{"name": "WebSearch"}], + stream=False, + custom_llm_provider="bedrock", + kwargs={}, + ) + + assert should_run is False + assert tools_dict == {}