BerriAI · Sameerlite · Feb 26, 2026 · Feb 19, 2026 · greptile-apps · Feb 19, 2026
diff --git a/litellm/integrations/websearch_interception/handler.py b/litellm/integrations/websearch_interception/handler.py
@@ -299,12 +299,54 @@ async def async_should_run_agentic_loop(
             f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
         )
 
-        # Return tools dict with tool calls
+        # Extract thinking blocks from response content.
+        # When extended thinking is enabled, the model response includes
+        # thinking/redacted_thinking blocks that must be preserved and
+        # prepended to the follow-up assistant message.
+        thinking_blocks: List[Dict] = []
+        if isinstance(response, dict):
+            content = response.get("content", [])
+        else:
+            content = getattr(response, "content", []) or []
+
+        for block in content:
+            if isinstance(block, dict):
+                block_type = block.get("type")
+            else:
+                block_type = getattr(block, "type", None)
+
+            if block_type in ("thinking", "redacted_thinking"):
+                if isinstance(block, dict):
+                    thinking_blocks.append(block)
+                else:
+                    # Convert object to dict using getattr, matching the
+                    # pattern in _detect_from_non_streaming_response
+                    thinking_block_dict: Dict = {"type": block_type}
+                    if block_type == "thinking":
+                        thinking_block_dict["thinking"] = getattr(
+                            block, "thinking", ""
+                        )
+                        thinking_block_dict["signature"] = getattr(
+                            block, "signature", ""
+                        )
+                    else:  # redacted_thinking
+                        thinking_block_dict["data"] = getattr(
+                            block, "data", ""
+                        )
+                    thinking_blocks.append(thinking_block_dict)
-                    # Convert object to dict using getattr, matching the
-                    # pattern in _detect_from_non_streaming_response
-                    thinking_block_dict: Dict = {"type": block_type}
-                    if block_type == "thinking":
-                        thinking_block_dict["thinking"] = getattr(
-                            block, "thinking", ""
-                        )
-                        thinking_block_dict["signature"] = getattr(
-                            block, "signature", ""
-                        )
-                    else:  # redacted_thinking
-                        thinking_block_dict["data"] = getattr(
-                            block, "data", ""
-                        )
-                    thinking_blocks.append(thinking_block_dict)
+                    # Convert object to dict using getattr, matching the
+                    # pattern in _detect_from_non_streaming_response
+                    thinking_block_dict: Dict = {"type": block_type}
+                    if block_type == "thinking":
+                        thinking_block_dict["thinking"] = getattr(
+                            block, "thinking", ""
+                        )
+                        thinking_block_dict["signature"] = getattr(
+                            block, "signature", ""
+                        )
+                    else:  # redacted_thinking
+                        thinking_block_dict["data"] = getattr(
+                            block, "data", ""
+                        )
+                    # Preserve cache_control if present
+                    cache_control = getattr(block, "cache_control", None)
+                    if cache_control is not None:
+                        thinking_block_dict["cache_control"] = cache_control
+                    thinking_blocks.append(thinking_block_dict)
-                    # Convert object to dict using getattr, matching the
-                    # pattern in _detect_from_non_streaming_response
-                    thinking_block_dict: Dict = {"type": block_type}
-                    if block_type == "thinking":
-                        thinking_block_dict["thinking"] = getattr(
-                            block, "thinking", ""
-                        )
-                        thinking_block_dict["signature"] = getattr(
-                            block, "signature", ""
-                        )
-                    else:  # redacted_thinking
-                        thinking_block_dict["data"] = getattr(
-                            block, "data", ""
-                        )
-                    thinking_blocks.append(thinking_block_dict)
+                    # Convert object to dict using getattr, matching the
+                    # pattern in _detect_from_non_streaming_response
+                    thinking_block_dict: Dict = {"type": block_type}
+                    if block_type == "thinking":
+                        thinking_block_dict["thinking"] = getattr(
+                            block, "thinking", ""
+                        )
+                        thinking_block_dict["signature"] = getattr(
+                            block, "signature", ""
+                        )
+                    else:  # redacted_thinking
+                        thinking_block_dict["data"] = getattr(
+                            block, "data", ""
+                        )
+                    # Preserve cache_control if present
+                    cache_control = getattr(block, "cache_control", None)
+                    if cache_control is not None:
+                        thinking_block_dict["cache_control"] = cache_control
+                    thinking_blocks.append(thinking_block_dict)
+
+        if thinking_blocks:
+            verbose_logger.debug(
+                f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response"
+            )
+
+        # Return tools dict with tool calls and thinking blocks
         tools_dict = {
             "tool_calls": tool_calls,
             "tool_type": "websearch",
             "provider": custom_llm_provider,
             "response_format": "anthropic",
+            "thinking_blocks": thinking_blocks,
         }
         return True, tools_dict
 
@@ -387,6 +429,7 @@ async def async_run_agentic_loop(
         """
 
         tool_calls = tools["tool_calls"]
+        thinking_blocks = tools.get("thinking_blocks", [])
 
         verbose_logger.debug(
             f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)"
@@ -396,6 +439,7 @@ async def async_run_agentic_loop(
             model=model,
             messages=messages,
             tool_calls=tool_calls,
+            thinking_blocks=thinking_blocks,
             anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
             logging_obj=logging_obj,
             stream=stream,
@@ -442,6 +486,7 @@ async def _execute_agentic_loop(
         model: str,
         messages: List[Dict],
         tool_calls: List[Dict],
+        thinking_blocks: List[Dict],
         anthropic_messages_optional_request_params: Dict,
         logging_obj: Any,
         stream: bool,
@@ -495,6 +540,7 @@ async def _execute_agentic_loop(
         assistant_message, user_message = WebSearchTransformation.transform_response(
             tool_calls=tool_calls,
             search_results=final_search_results,
+            thinking_blocks=thinking_blocks,
         )
 
         # Make follow-up request with search results

diff --git a/litellm/integrations/websearch_interception/transformation.py b/litellm/integrations/websearch_interception/transformation.py
@@ -4,7 +4,7 @@
 Transforms between Anthropic/OpenAI tool_use format and LiteLLM search format.
 """
 import json
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 from litellm._logging import verbose_logger
 from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME
@@ -224,6 +224,7 @@ def transform_response(
         tool_calls: List[Dict],
         search_results: List[str],
         response_format: str = "anthropic",
+        thinking_blocks: Optional[List[Dict]] = None,
     ) -> Tuple[Dict, Union[Dict, List[Dict]]]:
         """
         Transform LiteLLM search results to Anthropic/OpenAI tool_result format.
@@ -235,6 +236,10 @@ def transform_response(
             tool_calls: List of tool_use/tool_calls dicts from transform_request
             search_results: List of search result strings (one per tool_call)
             response_format: Response format - "anthropic" or "openai" (default: "anthropic")
+            thinking_blocks: Optional list of thinking/redacted_thinking blocks
+                from the model's response. When present, prepended to the
+                assistant message content (required by Anthropic API when
+                thinking is enabled).
 
         Returns:
             (assistant_message, user_or_tool_messages):
@@ -247,27 +252,42 @@ def transform_response(
             )
         else:
             return WebSearchTransformation._transform_response_anthropic(
-                tool_calls, search_results
+                tool_calls, search_results, thinking_blocks=thinking_blocks
             )
 
     @staticmethod
     def _transform_response_anthropic(
         tool_calls: List[Dict],
         search_results: List[str],
+        thinking_blocks: Optional[List[Dict]] = None,
     ) -> Tuple[Dict, Dict]:
         """Transform to Anthropic format (single user message with tool_result blocks)"""
-        # Build assistant message with tool_use blocks
-        assistant_message = {
-            "role": "assistant",
-            "content": [
+        # Build assistant message content
+        assistant_content: List[Dict] = []
+
+        # Prepend thinking blocks if present.
+        # When extended thinking is enabled, Anthropic requires the assistant
+        # message to start with thinking/redacted_thinking blocks before any
+        # tool_use blocks. Same pattern as anthropic_messages_pt in factory.py.
+        if thinking_blocks:
+            assistant_content.extend(thinking_blocks)
+
+        # Add tool_use blocks
+        assistant_content.extend(
+            [
                 {
                     "type": "tool_use",
                     "id": tc["id"],
                     "name": tc["name"],
                     "input": tc["input"],
                 }
                 for tc in tool_calls
-            ],
+            ]
+        )
+
+        assistant_message = {
+            "role": "assistant",
+            "content": assistant_content,
         }
 
         # Build user message with tool_result blocks