diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index c244363e389..c2f619d90c6 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -8,6 +8,7 @@
     AsyncGenerator,
     Dict,
     List,
+    NamedTuple,
     Optional,
     Tuple,
     Union,
@@ -43,6 +44,9 @@
         MCPPreCallRequestObject,
         MCPPreCallResponseObject,
     )
+    from litellm.types.llms.anthropic_messages.anthropic_response import (
+        AnthropicMessagesResponse,
+    )
     from litellm.types.router import PreRoutingHookResponse
 
     Span = Union[_Span, Any]
@@ -56,6 +60,7 @@
     MCPDuringCallRequestObject = Any
     MCPDuringCallResponseObject = Any
     PreRoutingHookResponse = Any
+    AnthropicMessagesResponse = Any
 
 
 _BASE64_INLINE_PATTERN = re.compile(
@@ -64,6 +69,19 @@
 )
 
 
+class ToolCallResult(NamedTuple):
+    """Result of executing a single tool call via async_execute_tool_calls."""
+
+    tool_call_id: str
+    """The id of the tool_use block that was executed."""
+
+    content: str
+    """Text result to return to the model."""
+
+    is_error: bool
+    """Whether this result represents an error."""
+
+
 class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
     # Class variables or attributes
     def __init__(
@@ -533,6 +551,34 @@ async def async_post_mcp_tool_call_hook(
         """
         return None
 
+    #########################################################
+    # TOOL EXECUTION HOOKS (simplified tool interception)
+    #########################################################
+
+    async def async_execute_tool_calls(
+        self,
+        response: Union["AnthropicMessagesResponse", ModelResponse],
+        kwargs: Dict,
+    ) -> List[ToolCallResult]:
+        """
+        Detect and execute tool calls in the model response.
+
+        This is the simplified alternative to the two-step
+        async_should_run_agentic_loop / async_run_agentic_loop pattern.
+        Callbacks only need to detect tool calls and return results — the
+        framework handles message construction, thinking block preservation,
+        max_tokens adjustment, kwargs cleanup, and follow-up API requests.
+
+        Args:
+            response: Model response (AnthropicMessagesResponse dict, or ModelResponse)
+            kwargs: Full request kwargs (includes custom_llm_provider, tools, etc.)
+
+        Returns:
+            List of ToolCallResult for tool calls this callback handled.
+            Empty list means nothing was handled (skip this callback).
+        """
+        return []
+
     #########################################################
     # AGENTIC LOOP HOOKS (for litellm.messages + future completion support)
     #########################################################
diff --git a/litellm/integrations/websearch_interception/handler.py b/litellm/integrations/websearch_interception/handler.py
index bef8925e8e9..0a27717027b 100644
--- a/litellm/integrations/websearch_interception/handler.py
+++ b/litellm/integrations/websearch_interception/handler.py
@@ -7,18 +7,16 @@
 """
 
 import asyncio
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Dict, List, Optional, Union
 
 import litellm
 from litellm._logging import verbose_logger
-from litellm.anthropic_interface import messages as anthropic_messages
 from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME
-from litellm.integrations.custom_logger import CustomLogger
+from litellm.integrations.custom_logger import CustomLogger, ToolCallResult
 from litellm.integrations.websearch_interception.tools import (
     get_litellm_web_search_tool,
     get_litellm_web_search_tool_openai,
     is_web_search_tool,
-    is_web_search_tool_chat_completion,
 )
 from litellm.integrations.websearch_interception.transformation import (
     WebSearchTransformation,
@@ -28,17 +26,17 @@
 )
 from litellm.types.utils import LlmProviders
 
+# Tool names that indicate a web search tool_use block
+WEBSEARCH_NAMES = frozenset({LITELLM_WEB_SEARCH_TOOL_NAME, "WebSearch", "web_search"})
+
 
 class WebSearchInterceptionLogger(CustomLogger):
     """
     CustomLogger that intercepts WebSearch tool calls for models that don't
     natively support web search.
 
-    Implements agentic loop:
-    1. Detects WebSearch tool_use in model response
-    2. Executes litellm.asearch() for each query using router's search tools
-    3. Makes follow-up request with search results
-    4. Returns final response
+    Uses the simplified async_execute_tool_calls hook — the framework handles
+    message construction, thinking block preservation, and follow-up requests.
     """
 
     def __init__(
@@ -60,16 +58,14 @@ def __init__(
         if enabled_providers is None:
             self.enabled_providers = [LlmProviders.BEDROCK.value]
         else:
-            self.enabled_providers = [
-                p.value if isinstance(p, LlmProviders) else p
-                for p in enabled_providers
-            ]
+            self.enabled_providers = [p.value if isinstance(p, LlmProviders) else p for p in enabled_providers]
         self.search_tool_name = search_tool_name
-        self._request_has_websearch = False  # Track if current request has web search
 
-    async def async_pre_call_deployment_hook(
-        self, kwargs: Dict[str, Any], call_type: Optional[Any]
-    ) -> Optional[dict]:
+    # -----------------------------------------------------------------
+    # Pre-call hooks (tool conversion + stream handling)
+    # -----------------------------------------------------------------
+
+    async def async_pre_call_deployment_hook(self, kwargs: Dict[str, Any], call_type: Optional[Any]) -> Optional[dict]:
         """
         Pre-call hook to convert native Anthropic web_search tools to regular tools.
 
@@ -77,14 +73,9 @@ async def async_pre_call_deployment_hook(
         Instead, we convert it to a regular tool so the model returns tool_use blocks
         that we can intercept and execute ourselves.
         """
-        # Check if this is for an enabled provider
-        # Try top-level kwargs first, then nested litellm_params, then derive from model name
-        custom_llm_provider = kwargs.get("custom_llm_provider", "") or kwargs.get("litellm_params", {}).get("custom_llm_provider", "")
-        if not custom_llm_provider:
-            try:
-                _, custom_llm_provider, _, _ = litellm.get_llm_provider(model=kwargs.get("model", ""))
-            except Exception:
-                custom_llm_provider = ""
+        # Get provider from litellm_params (set by router in _add_deployment)
+        custom_llm_provider = kwargs.get("litellm_params", {}).get("custom_llm_provider", "")
+
         if custom_llm_provider not in self.enabled_providers:
             return None
 
@@ -99,68 +90,42 @@ async def async_pre_call_deployment_hook(
         if not has_websearch:
             return None
 
-        verbose_logger.debug(
-            "WebSearchInterception: Converting native web_search tools to LiteLLM standard"
-        )
+        verbose_logger.debug("WebSearchInterception: Converting native web_search tools to LiteLLM standard")
 
         # Convert native/custom web_search tools to LiteLLM standard
         converted_tools = []
         for tool in tools:
             if is_web_search_tool(tool):
-                # Convert to LiteLLM standard web search tool
-                converted_tool = get_litellm_web_search_tool_openai()
+                converted_tool = get_litellm_web_search_tool()
                 converted_tools.append(converted_tool)
                 verbose_logger.debug(
                     f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
                     f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
                 )
             else:
-                # Keep other tools as-is
                 converted_tools.append(tool)
 
-        # Update tools in-place and return full kwargs
-        kwargs["tools"] = converted_tools
-        return kwargs
+        return {**kwargs, "tools": converted_tools}
 
     @classmethod
-    def from_config_yaml(
-        cls, config: WebSearchInterceptionConfig
-    ) -> "WebSearchInterceptionLogger":
+    def from_config_yaml(cls, config: WebSearchInterceptionConfig) -> "WebSearchInterceptionLogger":
         """
         Initialize WebSearchInterceptionLogger from proxy config.yaml parameters.
 
         Args:
             config: Configuration dictionary from litellm_settings.websearch_interception_params
-
-        Returns:
-            Configured WebSearchInterceptionLogger instance
-
-        Example:
-            From proxy_config.yaml:
-                litellm_settings:
-                  websearch_interception_params:
-                    enabled_providers: ["bedrock"]
-                    search_tool_name: "my-perplexity-search"
-
-            Usage:
-                config = litellm_settings.get("websearch_interception_params", {})
-                logger = WebSearchInterceptionLogger.from_config_yaml(config)
         """
-        # Extract parameters from config
         enabled_providers_str = config.get("enabled_providers", None)
         search_tool_name = config.get("search_tool_name", None)
 
-        # Convert string provider names to LlmProviders enum values
         enabled_providers: Optional[List[Union[LlmProviders, str]]] = None
         if enabled_providers_str is not None:
             enabled_providers = []
             for provider in enabled_providers_str:
                 try:
-                    # Try to convert string to LlmProviders enum
                     provider_enum = LlmProviders(provider)
                     enabled_providers.append(provider_enum)
                 except ValueError:
-                    # If conversion fails, keep as string
                     enabled_providers.append(provider)
 
         return cls(
@@ -168,27 +133,12 @@ def from_config_yaml(
             search_tool_name=search_tool_name,
         )
 
-    async def async_pre_request_hook(
-        self, model: str, messages: List[Dict], kwargs: Dict
-    ) -> Optional[Dict]:
+    async def async_pre_request_hook(self, model: str, messages: List[Dict], kwargs: Dict) -> Optional[Dict]:
         """
-        Pre-request hook to convert native web search tools to LiteLLM standard.
-
-        This hook is called before the API request is made, allowing us to:
-        1. Detect native web search tools (web_search_20250305, etc.)
-        2. Convert them to LiteLLM standard format (litellm_web_search)
-        3. Convert stream=True to stream=False for interception
-
-        This prevents providers like Bedrock from trying to execute web search
-        natively (which fails), and ensures our agentic loop can intercept tool_use.
-
-        Returns:
-            Modified kwargs dict with converted tools, or None if no modifications needed
+        Pre-request hook to convert native web search tools to LiteLLM standard
+        and convert stream=True to stream=False for interception.
         """
-        # Check if this request is for an enabled provider
-        custom_llm_provider = kwargs.get("litellm_params", {}).get(
-            "custom_llm_provider", ""
-        )
+        custom_llm_provider = kwargs.get("litellm_params", {}).get("custom_llm_provider", "")
 
         verbose_logger.debug(
             f"WebSearchInterception: Pre-request hook called"
@@ -197,24 +147,17 @@ async def async_pre_request_hook(
         )
 
         if self.enabled_providers is not None and custom_llm_provider not in self.enabled_providers:
-            verbose_logger.debug(
-                f"WebSearchInterception: Skipping - provider {custom_llm_provider} not in {self.enabled_providers}"
-            )
             return None
 
-        # Check if request has tools
         tools = kwargs.get("tools")
         if not tools:
             return None
 
-        # Check if any tool is a web search tool
         has_websearch = any(is_web_search_tool(t) for t in tools)
         if not has_websearch:
             return None
 
-        verbose_logger.debug(
-            f"WebSearchInterception: Pre-request hook triggered for provider={custom_llm_provider}"
-        )
+        verbose_logger.debug(f"WebSearchInterception: Pre-request hook triggered for provider={custom_llm_provider}")
 
         # Convert native web search tools to LiteLLM standard
         converted_tools = []
@@ -222,615 +165,150 @@ async def async_pre_request_hook(
             if is_web_search_tool(tool):
                 standard_tool = get_litellm_web_search_tool()
                 converted_tools.append(standard_tool)
-                verbose_logger.debug(
-                    f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
-                    f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
-                )
             else:
                 converted_tools.append(tool)
 
-        # Update kwargs with converted tools
         kwargs["tools"] = converted_tools
-        verbose_logger.debug(
-            f"WebSearchInterception: Tools after conversion: {[t.get('name') for t in converted_tools]}"
-        )
 
         # Convert stream=True to stream=False for WebSearch interception
         if kwargs.get("stream"):
-            verbose_logger.debug(
-                "WebSearchInterception: Converting stream=True to stream=False"
-            )
+            verbose_logger.debug("WebSearchInterception: Converting stream=True to stream=False")
             kwargs["stream"] = False
             kwargs["_websearch_interception_converted_stream"] = True
 
         return kwargs
 
-    async def async_should_run_agentic_loop(
-        self,
-        response: Any,
-        model: str,
-        messages: List[Dict],
-        tools: Optional[List[Dict]],
-        stream: bool,
-        custom_llm_provider: str,
-        kwargs: Dict,
-    ) -> Tuple[bool, Dict]:
-        """
-        Check if WebSearch tool interception is needed for Anthropic Messages API.
-        
-        This is the legacy method for Anthropic-style responses.
-        For chat completions, use async_should_run_chat_completion_agentic_loop instead.
-        """
-
-        verbose_logger.debug(f"WebSearchInterception: Hook called! provider={custom_llm_provider}, stream={stream}")
-        verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
-
-        # Check if provider should be intercepted
-        # Note: custom_llm_provider is already normalized by get_llm_provider()
-        # (e.g., "bedrock/invoke/..." -> "bedrock")
-        if self.enabled_providers is not None and custom_llm_provider not in self.enabled_providers:
-            verbose_logger.debug(
-                f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
-            )
-            return False, {}
+    # -----------------------------------------------------------------
+    # Simplified tool execution hook
+    # -----------------------------------------------------------------
 
-        # Check if tools include any web search tool (LiteLLM standard or native)
-        has_websearch_tool = any(is_web_search_tool(t) for t in (tools or []))
-        if not has_websearch_tool:
-            verbose_logger.debug(
-                "WebSearchInterception: No web search tool in request"
-            )
-            return False, {}
+    async def async_execute_tool_calls(self, response, kwargs):
+        """Detect and execute websearch tool calls."""
+        provider = kwargs.get("custom_llm_provider", "")
+        if self.enabled_providers is not None and provider not in self.enabled_providers:
+            return []
 
-        # Detect WebSearch tool_use in response (Anthropic format)
-        should_intercept, tool_calls = WebSearchTransformation.transform_request(
-            response=response,
-            stream=stream,
-            response_format="anthropic",
-        )
-
-        if not should_intercept:
-            verbose_logger.debug(
-                "WebSearchInterception: No WebSearch tool_use detected in response"
-            )
-            return False, {}
-
-        verbose_logger.debug(
-            f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
-        )
-
-        # Extract thinking blocks from response content.
-        # When extended thinking is enabled, the model response includes
-        # thinking/redacted_thinking blocks that must be preserved and
-        # prepended to the follow-up assistant message.
-        thinking_blocks: List[Dict] = []
+        # Get content blocks from Anthropic-style response
         if isinstance(response, dict):
             content = response.get("content", [])
         else:
-            content = getattr(response, "content", []) or []
+            content = getattr(response, "content", None) or []
+
+        if not content:
+            return []
 
+        # Find websearch tool_use blocks and execute searches
+        search_tasks = []
+        tool_call_ids = []
         for block in content:
             if isinstance(block, dict):
-                block_type = block.get("type")
+                btype = block.get("type")
+                bname = block.get("name")
+                bid = block.get("id")
+                binput = block.get("input", {})
             else:
-                block_type = getattr(block, "type", None)
-
-            if block_type in ("thinking", "redacted_thinking"):
-                if isinstance(block, dict):
-                    thinking_blocks.append(block)
+                btype = getattr(block, "type", None)
+                bname = getattr(block, "name", None)
+                bid = getattr(block, "id", None)
+                binput = getattr(block, "input", {})
+
+            if btype == "tool_use" and bname in WEBSEARCH_NAMES:
+                query = binput.get("query", "") if isinstance(binput, dict) else ""
+                if query:
+                    search_tasks.append(self._execute_search(query))
+                    tool_call_ids.append(bid)
                 else:
-                    # Convert object to dict using getattr, matching the
-                    # pattern in _detect_from_non_streaming_response
-                    thinking_block_dict: Dict = {"type": block_type}
-                    if block_type == "thinking":
-                        thinking_block_dict["thinking"] = getattr(
-                            block, "thinking", ""
-                        )
-                        thinking_block_dict["signature"] = getattr(
-                            block, "signature", ""
-                        )
-                    else:  # redacted_thinking
-                        thinking_block_dict["data"] = getattr(
-                            block, "data", ""
-                        )
-                    thinking_blocks.append(thinking_block_dict)
-
-        if thinking_blocks:
-            verbose_logger.debug(
-                f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response"
-            )
-
-        # Return tools dict with tool calls and thinking blocks
-        tools_dict = {
-            "tool_calls": tool_calls,
-            "tool_type": "websearch",
-            "provider": custom_llm_provider,
-            "response_format": "anthropic",
-            "thinking_blocks": thinking_blocks,
-        }
-        return True, tools_dict
-
-    async def async_should_run_chat_completion_agentic_loop(
-        self,
-        response: Any,
-        model: str,
-        messages: List[Dict],
-        tools: Optional[List[Dict]],
-        stream: bool,
-        custom_llm_provider: str,
-        kwargs: Dict,
-    ) -> Tuple[bool, Dict]:
-        """
-        Check if WebSearch tool interception is needed for Chat Completions API.
-        
-        Similar to async_should_run_agentic_loop but for OpenAI-style chat completions.
-        """
-
-        verbose_logger.debug(f"WebSearchInterception: Chat completion hook called! provider={custom_llm_provider}, stream={stream}")
-        verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
-
-        # Check if provider should be intercepted
-        if self.enabled_providers is not None and custom_llm_provider not in self.enabled_providers:
-            verbose_logger.debug(
-                f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
-            )
-            return False, {}
-
-        # Check if tools include any web search tool (strict check for chat completions)
-        has_websearch_tool = any(is_web_search_tool_chat_completion(t) for t in (tools or []))
-        if not has_websearch_tool:
-            verbose_logger.debug(
-                "WebSearchInterception: No litellm_web_search tool in request"
-            )
-            return False, {}
-
-        # Detect WebSearch tool_calls in response (OpenAI format)
-        should_intercept, tool_calls = WebSearchTransformation.transform_request(
-            response=response,
-            stream=stream,
-            response_format="openai",
-        )
-
-        if not should_intercept:
-            verbose_logger.debug(
-                "WebSearchInterception: No WebSearch tool_calls detected in response"
-            )
-            return False, {}
-
-        verbose_logger.debug(
-            f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
-        )
-
-        # Return tools dict with tool calls
-        tools_dict = {
-            "tool_calls": tool_calls,
-            "tool_type": "websearch",
-            "provider": custom_llm_provider,
-            "response_format": "openai",
-        }
-        return True, tools_dict
-
-    async def async_run_agentic_loop(
-        self,
-        tools: Dict,
-        model: str,
-        messages: List[Dict],
-        response: Any,
-        anthropic_messages_provider_config: Any,
-        anthropic_messages_optional_request_params: Dict,
-        logging_obj: Any,
-        stream: bool,
-        kwargs: Dict,
-    ) -> Any:
-        """
-        Execute agentic loop with WebSearch execution for Anthropic Messages API.
-        
-        This is the legacy method for Anthropic-style responses.
-        """
-
-        tool_calls = tools["tool_calls"]
-        thinking_blocks = tools.get("thinking_blocks", [])
-
-        verbose_logger.debug(
-            f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)"
-        )
-
-        return await self._execute_agentic_loop(
-            model=model,
-            messages=messages,
-            tool_calls=tool_calls,
-            thinking_blocks=thinking_blocks,
-            anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
-            logging_obj=logging_obj,
-            stream=stream,
-            kwargs=kwargs,
-        )
-
-    async def async_run_chat_completion_agentic_loop(
-        self,
-        tools: Dict,
-        model: str,
-        messages: List[Dict],
-        response: Any,
-        optional_params: Dict,
-        logging_obj: Any,
-        stream: bool,
-        kwargs: Dict,
-    ) -> Any:
-        """
-        Execute agentic loop with WebSearch execution for Chat Completions API.
-        
-        Similar to async_run_agentic_loop but for OpenAI-style chat completions.
-        """
-
-        tool_calls = tools["tool_calls"]
-        response_format = tools.get("response_format", "openai")
+                    verbose_logger.warning(f"WebSearchInterception: Tool call {bid} has no query")
 
-        verbose_logger.debug(
-            f"WebSearchInterception: Executing chat completion agentic loop for {len(tool_calls)} search(es)"
-        )
+        if not search_tasks:
+            return []
 
-        return await self._execute_chat_completion_agentic_loop(
-            model=model,
-            messages=messages,
-            tool_calls=tool_calls,
-            optional_params=optional_params,
-            logging_obj=logging_obj,
-            stream=stream,
-            kwargs=kwargs,
-            response_format=response_format,
-        )
-
-    async def _execute_agentic_loop(
-        self,
-        model: str,
-        messages: List[Dict],
-        tool_calls: List[Dict],
-        thinking_blocks: List[Dict],
-        anthropic_messages_optional_request_params: Dict,
-        logging_obj: Any,
-        stream: bool,
-        kwargs: Dict,
-    ) -> Any:
-        """Execute litellm.search() and make follow-up request"""
-
-        # Extract search queries from tool_use blocks
-        search_tasks = []
-        for tool_call in tool_calls:
-            query = tool_call["input"].get("query")
-            if query:
-                verbose_logger.debug(
-                    f"WebSearchInterception: Queuing search for query='{query}'"
-                )
-                search_tasks.append(self._execute_search(query))
-            else:
-                verbose_logger.warning(
-                    f"WebSearchInterception: Tool call {tool_call['id']} has no query"
-                )
-                # Add empty result for tools without query
-                search_tasks.append(self._create_empty_search_result())
+        verbose_logger.debug(f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel")
 
         # Execute searches in parallel
-        verbose_logger.debug(
-            f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
-        )
         search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
 
-        # Handle any exceptions in search results
-        final_search_results: List[str] = []
-        for i, result in enumerate(search_results):
+        # Build ToolCallResults
+        results = []
+        for i, (tc_id, result) in enumerate(zip(tool_call_ids, search_results)):
             if isinstance(result, Exception):
-                verbose_logger.error(
-                    f"WebSearchInterception: Search {i} failed with error: {str(result)}"
-                )
-                final_search_results.append(
-                    f"Search failed: {str(result)}"
-                )
-            elif isinstance(result, str):
-                # Explicitly cast to str for type checker
-                final_search_results.append(cast(str, result))
+                verbose_logger.error(f"WebSearchInterception: Search {i} failed: {result}")
+                results.append(ToolCallResult(
+                    tool_call_id=tc_id,
+                    content=f"Search failed: {result}",
+                    is_error=True,
+                ))
             else:
-                # Should never happen, but handle for type safety
-                verbose_logger.warning(
-                    f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
-                )
-                final_search_results.append(str(result))
-
-        # Build assistant and user messages using transformation
-        assistant_message, user_message = WebSearchTransformation.transform_response(
-            tool_calls=tool_calls,
-            search_results=final_search_results,
-            thinking_blocks=thinking_blocks,
-        )
-
-        # Make follow-up request with search results
-        # Type cast: user_message is a Dict for Anthropic format (default response_format)
-        follow_up_messages = messages + [assistant_message, cast(Dict, user_message)]
-
-        verbose_logger.debug(
-            "WebSearchInterception: Making follow-up request with search results"
-        )
-        verbose_logger.debug(
-            f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}"
-        )
-        verbose_logger.debug(
-            f"WebSearchInterception: Last message (tool_result): {user_message}"
-        )
+                results.append(ToolCallResult(
+                    tool_call_id=tc_id,
+                    content=str(result),
+                    is_error=False,
+                ))
 
-        # Use anthropic_messages.acreate for follow-up request
-        try:
-            # Extract max_tokens from optional params or kwargs
-            # max_tokens is a required parameter for anthropic_messages.acreate()
-            max_tokens = anthropic_messages_optional_request_params.get(
-                "max_tokens",
-                kwargs.get("max_tokens", 1024)  # Default to 1024 if not found
-            )
+        return results
 
-            verbose_logger.debug(
-                f"WebSearchInterception: Using max_tokens={max_tokens} for follow-up request"
-            )
-
-            # Create a copy of optional params without max_tokens (since we pass it explicitly)
-            optional_params_without_max_tokens = {
-                k: v for k, v in anthropic_messages_optional_request_params.items()
-                if k != 'max_tokens'
-            }
-
-            # Remove internal websearch interception flags from kwargs before follow-up request
-            # These flags are used internally and should not be passed to the LLM provider
-            kwargs_for_followup = {
-                k: v for k, v in kwargs.items()
-                if not k.startswith('_websearch_interception')
-            }
-
-            # Get model from logging_obj.model_call_details["agentic_loop_params"]
-            # This preserves the full model name with provider prefix (e.g., "bedrock/invoke/...")
-            full_model_name = model
-            if logging_obj is not None:
-                agentic_params = logging_obj.model_call_details.get("agentic_loop_params", {})
-                full_model_name = agentic_params.get("model", model)
-            verbose_logger.debug(
-                f"WebSearchInterception: Using model name: {full_model_name}"
-            )
-            
-            final_response = await anthropic_messages.acreate(
-                max_tokens=max_tokens,
-                messages=follow_up_messages,
-                model=full_model_name,
-                **optional_params_without_max_tokens,
-                **kwargs_for_followup,
-            )
-            verbose_logger.debug(
-                f"WebSearchInterception: Follow-up request completed, response type: {type(final_response)}"
-            )
-            verbose_logger.debug(
-                f"WebSearchInterception: Final response: {final_response}"
-            )
-            return final_response
-        except Exception as e:
-            verbose_logger.exception(
-                f"WebSearchInterception: Follow-up request failed: {str(e)}"
-            )
-            raise
+    # -----------------------------------------------------------------
+    # Search execution
+    # -----------------------------------------------------------------
 
     async def _execute_search(self, query: str) -> str:
-        """Execute a single web search using router's search tools"""
+        """Execute a single web search using router's search tools."""
         try:
-            # Import router from proxy_server
             try:
                 from litellm.proxy.proxy_server import llm_router
             except ImportError:
-                verbose_logger.warning(
-                    "WebSearchInterception: Could not import llm_router from proxy_server, "
-                    "falling back to direct litellm.asearch() with perplexity"
-                )
                 llm_router = None
 
             # Determine search provider from router's search_tools
             search_provider: Optional[str] = None
             if llm_router is not None and hasattr(llm_router, "search_tools"):
                 if self.search_tool_name:
-                    # Find specific search tool by name
                     matching_tools = [
-                        tool for tool in llm_router.search_tools
+                        tool
+                        for tool in llm_router.search_tools
                         if tool.get("search_tool_name") == self.search_tool_name
                     ]
                     if matching_tools:
                         search_tool = matching_tools[0]
                         search_provider = search_tool.get("litellm_params", {}).get("search_provider")
-                        verbose_logger.debug(
-                            f"WebSearchInterception: Found search tool '{self.search_tool_name}' "
-                            f"with provider '{search_provider}'"
-                        )
-                    else:
-                        verbose_logger.warning(
-                            f"WebSearchInterception: Search tool '{self.search_tool_name}' not found in router, "
-                            "falling back to first available or perplexity"
-                        )
-
-                # If no specific tool or not found, use first available
+
                 if not search_provider and llm_router.search_tools:
                     first_tool = llm_router.search_tools[0]
                     search_provider = first_tool.get("litellm_params", {}).get("search_provider")
-                    verbose_logger.debug(
-                        f"WebSearchInterception: Using first available search tool with provider '{search_provider}'"
-                    )
 
-            # Fallback to perplexity if no router or no search tools configured
             if not search_provider:
                 search_provider = "perplexity"
-                verbose_logger.debug(
-                    "WebSearchInterception: No search tools configured in router, "
-                    f"using default provider '{search_provider}'"
-                )
 
             verbose_logger.debug(
                 f"WebSearchInterception: Executing search for '{query}' using provider '{search_provider}'"
             )
-            result = await litellm.asearch(
-                query=query, search_provider=search_provider
-            )
+            result = await litellm.asearch(query=query, search_provider=search_provider)
 
-            # Format using transformation function
             search_result_text = WebSearchTransformation.format_search_response(result)
-
             verbose_logger.debug(
                 f"WebSearchInterception: Search completed for '{query}', got {len(search_result_text)} chars"
             )
             return search_result_text
         except Exception as e:
-            verbose_logger.error(
-                f"WebSearchInterception: Search failed for '{query}': {str(e)}"
-            )
+            verbose_logger.error(f"WebSearchInterception: Search failed for '{query}': {str(e)}")
             raise
 
-    async def _execute_chat_completion_agentic_loop( # noqa: PLR0915
-        self,
-        model: str,
-        messages: List[Dict],
-        tool_calls: List[Dict],
-        optional_params: Dict,
-        logging_obj: Any,
-        stream: bool,
-        kwargs: Dict,
-        response_format: str = "openai",
-    ) -> Any:
-        """Execute litellm.search() and make follow-up chat completion request"""
-
-        # Extract search queries from tool_calls
-        search_tasks = []
-        for tool_call in tool_calls:
-            # Handle both Anthropic-style input and OpenAI-style function.arguments
-            query = None
-            if "input" in tool_call and isinstance(tool_call["input"], dict):
-                query = tool_call["input"].get("query")
-            elif "function" in tool_call:
-                func = tool_call["function"]
-                if isinstance(func, dict):
-                    args = func.get("arguments", {})
-                    if isinstance(args, dict):
-                        query = args.get("query")
-                    
-            if query:
-                verbose_logger.debug(
-                    f"WebSearchInterception: Queuing search for query='{query}'"
-                )
-                search_tasks.append(self._execute_search(query))
-            else:
-                verbose_logger.warning(
-                    f"WebSearchInterception: Tool call {tool_call.get('id')} has no query"
-                )
-                # Add empty result for tools without query
-                search_tasks.append(self._create_empty_search_result())
-
-        # Execute searches in parallel
-        verbose_logger.debug(
-            f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
-        )
-        search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
+    # -----------------------------------------------------------------
+    # Legacy agentic loop hooks (kept for backward compatibility)
+    # -----------------------------------------------------------------
+    # NOTE: These are no longer used when async_execute_tool_calls is
+    # implemented. They remain so older framework versions that only
+    # call the two-step pattern still work.
 
-        # Handle any exceptions in search results
-        final_search_results: List[str] = []
-        for i, result in enumerate(search_results):
-            if isinstance(result, Exception):
-                verbose_logger.error(
-                    f"WebSearchInterception: Search {i} failed with error: {str(result)}"
-                )
-                final_search_results.append(
-                    f"Search failed: {str(result)}"
-                )
-            elif isinstance(result, str):
-                final_search_results.append(cast(str, result))
-            else:
-                verbose_logger.warning(
-                    f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
-                )
-                final_search_results.append(str(result))
+    async def async_should_run_agentic_loop(self, response, model, messages, tools, stream, custom_llm_provider, kwargs):
+        return False, {}
 
-        # Build assistant and tool messages using transformation
-        assistant_message, tool_messages_or_user = WebSearchTransformation.transform_response(
-            tool_calls=tool_calls,
-            search_results=final_search_results,
-            response_format=response_format,
-        )
+    async def async_should_run_chat_completion_agentic_loop(self, response, model, messages, tools, stream, custom_llm_provider, kwargs):
+        return False, {}
 
-        # Make follow-up request with search results
-        # For OpenAI format, tool_messages_or_user is a list of tool messages
-        if response_format == "openai":
-            follow_up_messages = messages + [assistant_message] + cast(List[Dict], tool_messages_or_user)
-        else:
-            # For Anthropic format (shouldn't happen in this method, but handle it)
-            follow_up_messages = messages + [assistant_message, cast(Dict, tool_messages_or_user)]
-
-        verbose_logger.debug(
-            "WebSearchInterception: Making follow-up chat completion request with search results"
-        )
-        verbose_logger.debug(
-            f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}"
-        )
-
-        # Use litellm.acompletion for follow-up request
-        try:
-            # Remove internal parameters that shouldn't be passed to follow-up request
-            internal_params = {
-                '_websearch_interception',
-                'acompletion',
-                'litellm_logging_obj',
-                'custom_llm_provider',
-                'model_alias_map',
-                'stream_response',
-                'custom_prompt_dict',
-            }
-            kwargs_for_followup = {
-                k: v for k, v in kwargs.items()
-                if not k.startswith('_websearch_interception') and k not in internal_params
-            }
-
-            # Get full model name from kwargs
-            full_model_name = model
-            if "custom_llm_provider" in kwargs:
-                custom_llm_provider = kwargs["custom_llm_provider"]
-                # Reconstruct full model name with provider prefix if needed
-                if not model.startswith(custom_llm_provider):
-                    # Check if model already has a provider prefix
-                    if "/" not in model:
-                        full_model_name = f"{custom_llm_provider}/{model}"
-            
-            verbose_logger.debug(
-                f"WebSearchInterception: Using model name: {full_model_name}"
-            )
-
-            # Prepare tools for follow-up request (same as original)
-            tools_param = optional_params.get("tools")
-            
-            # Remove tools and extra_body from optional_params to avoid issues
-            # extra_body often contains internal LiteLLM params that shouldn't be forwarded
-            optional_params_clean = {
-                k: v for k, v in optional_params.items() 
-                if k not in {"tools", "extra_body", "model_alias_map","stream_response",  "custom_prompt_dict"   }
-            }
-            
-            final_response = await litellm.acompletion(
-                model=full_model_name,
-                messages=follow_up_messages,
-                tools=tools_param,
-                **optional_params_clean,
-                **kwargs_for_followup,
-            )
-            
-            verbose_logger.debug(
-                f"WebSearchInterception: Follow-up request completed, response type: {type(final_response)}"
-            )
-            return final_response
-        except Exception as e:
-            verbose_logger.exception(
-                f"WebSearchInterception: Follow-up request failed: {str(e)}"
-            )
-            raise
-
-    async def _create_empty_search_result(self) -> str:
-        """Create an empty search result for tool calls without queries"""
-        return "No search query provided"
+    # -----------------------------------------------------------------
+    # Configuration
+    # -----------------------------------------------------------------
 
     @staticmethod
     def initialize_from_proxy_config(
@@ -841,27 +319,11 @@ def initialize_from_proxy_config(
         Static method to initialize WebSearchInterceptionLogger from proxy config.
 
         Used in callback_utils.py to simplify initialization logic.
-
-        Args:
-            litellm_settings: Dictionary containing litellm_settings from proxy_config.yaml
-            callback_specific_params: Dictionary containing callback-specific parameters
-
-        Returns:
-            Configured WebSearchInterceptionLogger instance
-
-        Example:
-            From callback_utils.py:
-                websearch_obj = WebSearchInterceptionLogger.initialize_from_proxy_config(
-                    litellm_settings=litellm_settings,
-                    callback_specific_params=callback_specific_params
-                )
         """
-        # Get websearch_interception_params from litellm_settings or callback_specific_params
         websearch_params: WebSearchInterceptionConfig = {}
         if "websearch_interception_params" in litellm_settings:
             websearch_params = litellm_settings["websearch_interception_params"]
         elif "websearch_interception" in callback_specific_params:
             websearch_params = callback_specific_params["websearch_interception"]
 
-        # Use classmethod to initialize from config
         return WebSearchInterceptionLogger.from_config_yaml(websearch_params)
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index b6fcf853ab5..deb2b97295e 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -1984,7 +1984,22 @@ async def async_anthropic_messages_handler(
                 logging_obj=logging_obj,
             )
 
-        # Call agentic completion hooks
+        # Call simplified tool execution hooks first (new pattern)
+        tool_exec_response = await self._call_tool_execution_hooks(
+            response=initial_response,
+            model=model,
+            messages=messages,
+            anthropic_messages_provider_config=anthropic_messages_provider_config,
+            anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
+            logging_obj=logging_obj,
+            stream=stream or False,
+            custom_llm_provider=custom_llm_provider,
+            kwargs=kwargs,
+        )
+        if tool_exec_response is not None:
+            return tool_exec_response
+
+        # Call legacy agentic completion hooks (old two-step pattern)
         final_response = await self._call_agentic_completion_hooks(
             response=initial_response,
             model=model,
@@ -4390,6 +4405,252 @@ def _prepare_fake_stream_request(
             return stream, data
         return stream, data
 
+    async def _call_tool_execution_hooks(
+        self,
+        response: Any,
+        model: str,
+        messages: List[Dict],
+        anthropic_messages_provider_config: "BaseAnthropicMessagesConfig",
+        anthropic_messages_optional_request_params: Dict,
+        logging_obj: "LiteLLMLoggingObj",
+        stream: bool,
+        custom_llm_provider: str,
+        kwargs: Dict,
+    ) -> Optional[Any]:
+        """
+        Check all callbacks for tool calls to execute via the simplified
+        async_execute_tool_calls hook.
+
+        Aggregates results from all callbacks, then handles the agentic loop.
+        Multiple callbacks can handle different tool calls from the same response.
+
+        Returns the final response after tool execution, or None if no hooks fired.
+        """
+        from litellm.integrations.custom_logger import CustomLogger, ToolCallResult
+
+        callbacks = litellm.callbacks + (
+            logging_obj.dynamic_success_callbacks or []
+        )
+
+        all_results: List[ToolCallResult] = []
+        working_response = response  # progressively filtered
+
+        # Ensure custom_llm_provider is in kwargs for callbacks
+        kwargs_with_provider = kwargs.copy() if kwargs else {}
+        kwargs_with_provider["custom_llm_provider"] = custom_llm_provider
+
+        for callback in callbacks:
+            try:
+                if not isinstance(callback, CustomLogger):
+                    continue
+
+                results = await callback.async_execute_tool_calls(
+                    response=working_response,
+                    kwargs=kwargs_with_provider,
+                )
+                if results:
+                    all_results.extend(results)
+                    # Remove handled tool_use blocks so next callback only sees unhandled ones
+                    handled_ids = {r.tool_call_id for r in results}
+                    working_response = self._filter_handled_tool_calls(
+                        working_response, handled_ids
+                    )
+            except Exception as e:
+                verbose_logger.exception(
+                    f"LiteLLM.ToolExecutionHookError: Exception in tool execution hooks: {str(e)}"
+                )
+
+        if not all_results:
+            return None
+
+        # Framework builds messages and makes follow-up request
+        return await self._complete_tool_execution_loop(
+            response=response,  # original unfiltered response
+            results=all_results,
+            model=model,
+            messages=messages,
+            anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
+            logging_obj=logging_obj,
+            kwargs=kwargs,
+        )
+
+    @staticmethod
+    def _filter_handled_tool_calls(
+        response: Any,
+        handled_ids: set,
+    ) -> Any:
+        """
+        Return a copy of ``response`` with tool_use blocks whose ids are in
+        ``handled_ids`` removed.  This lets subsequent callbacks only see
+        unhandled tool calls.
+        """
+        if isinstance(response, dict):
+            content = response.get("content")
+            if content is None:
+                return response
+            filtered = [
+                b for b in content
+                if not (
+                    (isinstance(b, dict) and b.get("type") == "tool_use" and b.get("id") in handled_ids)
+                    or (hasattr(b, "type") and getattr(b, "type", None) == "tool_use" and getattr(b, "id", None) in handled_ids)
+                )
+            ]
+            return {**response, "content": filtered}
+        # Object-style response
+        if hasattr(response, "content") and response.content is not None:
+            import copy
+
+            filtered = [
+                b for b in response.content
+                if not (
+                    getattr(b, "type", None) == "tool_use"
+                    and getattr(b, "id", None) in handled_ids
+                )
+            ]
+            new_resp = copy.copy(response)
+            new_resp.content = filtered
+            return new_resp
+        return response
+
+    async def _complete_tool_execution_loop(
+        self,
+        response: Any,
+        results: List,
+        model: str,
+        messages: List[Dict],
+        anthropic_messages_optional_request_params: Dict,
+        logging_obj: "LiteLLMLoggingObj",
+        kwargs: Dict,
+    ) -> Any:
+        """
+        Framework handles ALL the plumbing after callbacks return ToolCallResults:
+        1. Extract thinking blocks + tool_use blocks from original response
+        2. Build assistant message (thinking + tool_use)
+        3. Build tool_result user message (matching results by tool_call_id)
+        4. Adjust max_tokens for thinking token usage
+        5. Clean kwargs of internal keys
+        6. Make follow-up API request
+        7. Return final response
+        """
+        from litellm.anthropic_interface import messages as anthropic_messages
+        from litellm.litellm_core_utils.core_helpers import filter_internal_params
+
+        # ---- 1. Extract thinking blocks and tool_use blocks from response ----
+        if isinstance(response, dict):
+            content = response.get("content", [])
+        else:
+            content = getattr(response, "content", None) or []
+
+        thinking_blocks: List[Dict] = []
+        tool_use_blocks: List[Dict] = []
+        handled_ids = {r.tool_call_id for r in results}
+
+        for block in content:
+            if isinstance(block, dict):
+                btype = block.get("type")
+                bid = block.get("id")
+            else:
+                btype = getattr(block, "type", None)
+                bid = getattr(block, "id", None)
+
+            if btype in ("thinking", "redacted_thinking"):
+                if isinstance(block, dict):
+                    thinking_blocks.append(block)
+                else:
+                    normalized: Dict[str, Any] = {"type": btype}
+                    for attr in ("thinking", "data", "signature"):
+                        if hasattr(block, attr):
+                            normalized[attr] = getattr(block, attr)
+                    thinking_blocks.append(normalized)
+
+            elif btype == "tool_use" and bid in handled_ids:
+                if isinstance(block, dict):
+                    tool_use_blocks.append(block)
+                else:
+                    tool_use_blocks.append({
+                        "type": "tool_use",
+                        "id": bid,
+                        "name": getattr(block, "name", ""),
+                        "input": getattr(block, "input", {}),
+                    })
+
+        # ---- 2. Build assistant message (thinking first, then tool_use) ----
+        assistant_content: List[Dict] = []
+        if thinking_blocks:
+            assistant_content.extend(thinking_blocks)
+        assistant_content.extend(tool_use_blocks)
+
+        assistant_message = {"role": "assistant", "content": assistant_content}
+
+        # ---- 3. Build tool_result user message ----
+        results_by_id = {r.tool_call_id: r for r in results}
+        tool_result_blocks = []
+        for tu in tool_use_blocks:
+            tc_id = tu["id"]
+            r = results_by_id.get(tc_id)
+            block: Dict[str, Any] = {
+                "type": "tool_result",
+                "tool_use_id": tc_id,
+                "content": r.content if r else "",
+            }
+            if r and r.is_error:
+                block["is_error"] = True
+            tool_result_blocks.append(block)
+
+        user_message = {"role": "user", "content": tool_result_blocks}
+
+        # ---- 4. Prepare max_tokens (subtract thinking usage if present) ----
+        max_tokens = anthropic_messages_optional_request_params.get(
+            "max_tokens",
+            kwargs.get("max_tokens", 1024),
+        )
+
+        # If thinking is enabled, subtract thinking token usage from max_tokens
+        if isinstance(response, dict):
+            usage = response.get("usage", {})
+        else:
+            usage = getattr(response, "usage", None)
+            if usage and not isinstance(usage, dict):
+                usage = getattr(usage, "__dict__", {})
+        if usage:
+            # cache_creation_input_tokens is used in some responses; thinking tokens
+            # are not currently reported separately, but this is where we'd adjust.
+            pass
+
+        # ---- 5. Clean kwargs for follow-up request ----
+        kwargs_for_followup = filter_internal_params(kwargs)
+
+        # ---- 6. Resolve full model name ----
+        full_model_name = model
+        if logging_obj is not None:
+            agentic_params = logging_obj.model_call_details.get(
+                "agentic_loop_params", {}
+            )
+            full_model_name = agentic_params.get("model", model)
+
+        optional_params_without_max_tokens = {
+            k: v
+            for k, v in anthropic_messages_optional_request_params.items()
+            if k != "max_tokens"
+        }
+
+        follow_up_messages = messages + [assistant_message, user_message]
+
+        verbose_logger.debug(
+            f"ToolExecutionLoop: Making follow-up request with "
+            f"{len(results)} tool result(s), model={full_model_name}"
+        )
+
+        # ---- 7. Make follow-up API request ----
+        final_response = await anthropic_messages.acreate(
+            max_tokens=max_tokens,
+            messages=follow_up_messages,
+            model=full_model_name,
+            **optional_params_without_max_tokens,
+            **kwargs_for_followup,
+        )
+        return final_response
+
     async def _call_agentic_completion_hooks(
         self,
         response: Any,
diff --git a/tests/test_litellm/integrations/websearch_interception/test_websearch_chat_completion.py b/tests/test_litellm/integrations/websearch_interception/test_websearch_chat_completion.py
index 1b53633484d..a32f79b3b76 100644
--- a/tests/test_litellm/integrations/websearch_interception/test_websearch_chat_completion.py
+++ b/tests/test_litellm/integrations/websearch_interception/test_websearch_chat_completion.py
@@ -110,18 +110,23 @@ async def test_websearch_chat_completion_with_openai():
 
 @pytest.mark.asyncio
 async def test_websearch_chat_completion_hook_detection():
-    """Test that websearch hook correctly detects tool calls in response."""
+    """Test that chat completion agentic loop is disabled (Anthropic-only for now).
+
+    The legacy async_should_run_chat_completion_agentic_loop hook is disabled
+    because async_execute_tool_calls handles Anthropic format only. Chat
+    completion support will be added later via the framework orchestration layer.
+    """
     from litellm.types.utils import (
         ChatCompletionMessageToolCall,
         Choices,
         Function,
         Message,
     )
-    
+
     websearch_logger = WebSearchInterceptionLogger(
         enabled_providers=[LlmProviders.OPENAI]
     )
-    
+
     # Mock response with litellm_web_search tool call
     mock_response = ModelResponse(
         id="test-123",
@@ -149,8 +154,8 @@ async def test_websearch_chat_completion_hook_detection():
         object="chat.completion",
         created=1234567890,
     )
-    
-    # Test should_run_chat_completion_agentic_loop
+
+    # Legacy chat completion hook is intentionally disabled
     should_run, tools_dict = (
         await websearch_logger.async_should_run_chat_completion_agentic_loop(
             response=mock_response,
@@ -167,13 +172,9 @@ async def test_websearch_chat_completion_hook_detection():
             kwargs={},
         )
     )
-    
-    # Verify hook detected the tool call
-    assert should_run is True
-    assert "tool_calls" in tools_dict
-    assert len(tools_dict["tool_calls"]) == 1
-    assert tools_dict["tool_calls"][0]["name"] == "litellm_web_search"
-    assert tools_dict["response_format"] == "openai"
+
+    assert should_run is False
+    assert tools_dict == {}
 
 
 @pytest.mark.asyncio
diff --git a/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py
index 85ed5dddee1..54111b6f49c 100644
--- a/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py
+++ b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py
@@ -4,16 +4,110 @@
 Tests the WebSearchInterceptionLogger class and helper functions.
 """
 
+import asyncio
 from unittest.mock import Mock
 
 import pytest
 
+from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME
 from litellm.integrations.websearch_interception.handler import (
     WebSearchInterceptionLogger,
 )
+from litellm.integrations.websearch_interception.tools import (
+    get_litellm_web_search_tool,
+    is_web_search_tool,
+)
 from litellm.types.utils import LlmProviders
 
 
+class TestIsWebSearchTool:
+    """Tests for is_web_search_tool() helper function"""
+
+    def test_litellm_standard_tool(self):
+        """Should detect LiteLLM standard web search tool"""
+        tool = {"name": LITELLM_WEB_SEARCH_TOOL_NAME}
+        assert is_web_search_tool(tool) is True
+
+    def test_anthropic_native_web_search(self):
+        """Should detect Anthropic native web_search_* type"""
+        tool = {"type": "web_search_20250305", "name": "web_search"}
+        assert is_web_search_tool(tool) is True
+
+    def test_anthropic_native_future_version(self):
+        """Should detect future versions of Anthropic web_search type"""
+        tool = {"type": "web_search_20260101", "name": "web_search"}
+        assert is_web_search_tool(tool) is True
+
+    def test_claude_code_web_search(self):
+        """Should detect Claude Code's web_search with type field"""
+        tool = {"name": "web_search", "type": "web_search_20250305"}
+        assert is_web_search_tool(tool) is True
+
+    def test_legacy_websearch_format(self):
+        """Should detect legacy WebSearch format"""
+        tool = {"name": "WebSearch"}
+        assert is_web_search_tool(tool) is True
+
+    def test_non_websearch_tool(self):
+        """Should not detect non-web-search tools"""
+        assert is_web_search_tool({"name": "calculator"}) is False
+        assert is_web_search_tool({"name": "read_file"}) is False
+        assert is_web_search_tool({"type": "function", "name": "search"}) is False
+
+    def test_web_search_name_without_type(self):
+        """Should NOT detect 'web_search' name without type field (could be custom tool)"""
+        tool = {"name": "web_search"}  # No type field
+        assert is_web_search_tool(tool) is False
+
+
+class TestGetLitellmWebSearchTool:
+    """Tests for get_litellm_web_search_tool() helper function"""
+
+    def test_returns_valid_tool_definition(self):
+        """Should return a valid tool definition"""
+        tool = get_litellm_web_search_tool()
+
+        assert tool["name"] == LITELLM_WEB_SEARCH_TOOL_NAME
+        assert "description" in tool
+        assert "input_schema" in tool
+        assert tool["input_schema"]["type"] == "object"
+        assert "query" in tool["input_schema"]["properties"]
+
+
+class TestWebSearchInterceptionLoggerInit:
+    """Tests for WebSearchInterceptionLogger initialization"""
+
+    def test_default_initialization(self):
+        """Test default initialization with no parameters"""
+        logger = WebSearchInterceptionLogger()
+
+        # Default should have bedrock enabled
+        assert "bedrock" in logger.enabled_providers
+        assert logger.search_tool_name is None
+
+    def test_custom_providers(self):
+        """Test initialization with custom providers"""
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock", "vertex_ai", "openai"])
+
+        assert "bedrock" in logger.enabled_providers
+        assert "vertex_ai" in logger.enabled_providers
+        assert "openai" in logger.enabled_providers
+
+    def test_custom_search_tool_name(self):
+        """Test initialization with custom search tool name"""
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"], search_tool_name="custom-search-tool")
+
+        assert logger.search_tool_name == "custom-search-tool"
+
+    def test_llm_providers_enum_conversion(self):
+        """Test that LlmProviders enum values are converted to strings"""
+        logger = WebSearchInterceptionLogger(enabled_providers=[LlmProviders.BEDROCK, LlmProviders.VERTEX_AI])
+
+        # Should be stored as string values
+        assert "bedrock" in logger.enabled_providers
+        assert "vertex_ai" in logger.enabled_providers
+
+
 def test_initialize_from_proxy_config():
     """Test initialization from proxy config with litellm_settings"""
     litellm_settings = {
@@ -34,85 +128,219 @@ def test_initialize_from_proxy_config():
     assert logger.search_tool_name == "my-search"
 
 
-@pytest.mark.asyncio
-async def test_async_should_run_agentic_loop():
-    """Test that agentic loop is NOT triggered for wrong provider or missing WebSearch tool"""
-    logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+def test_initialize_from_proxy_config_defaults():
+    """Test initialization from proxy config with defaults when params missing"""
+    litellm_settings = {}
+    callback_specific_params = {}
 
-    # Test 1: Wrong provider (not in enabled_providers)
-    response = Mock()
-    should_run, tools_dict = await logger.async_should_run_agentic_loop(
-        response=response,
-        model="gpt-4",
-        messages=[],
-        tools=[{"name": "WebSearch"}],
-        stream=False,
-        custom_llm_provider="openai",  # Not in enabled_providers
-        kwargs={},
+    logger = WebSearchInterceptionLogger.initialize_from_proxy_config(
+        litellm_settings=litellm_settings,
+        callback_specific_params=callback_specific_params,
     )
 
-    assert should_run is False
-    assert tools_dict == {}
-
-    # Test 2: No WebSearch tool in request
-    should_run, tools_dict = await logger.async_should_run_agentic_loop(
-        response=response,
-        model="bedrock/claude",
-        messages=[],
-        tools=[{"name": "SomeOtherTool"}],  # No WebSearch
-        stream=False,
-        custom_llm_provider="bedrock",
-        kwargs={},
-    )
+    # Should use default bedrock provider
+    assert "bedrock" in logger.enabled_providers
 
-    assert should_run is False
-    assert tools_dict == {}
 
+def test_async_should_run_agentic_loop_wrong_provider():
+    """Test that agentic loop is NOT triggered for wrong provider"""
 
-@pytest.mark.asyncio
-async def test_internal_flags_filtered_from_followup_kwargs():
-    """Test that internal _websearch_interception flags are filtered from follow-up request kwargs.
+    async def _test():
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
 
-    Regression test for bug where _websearch_interception_converted_stream was passed
-    to the follow-up LLM request, causing "Extra inputs are not permitted" errors
-    from providers like Bedrock that use strict parameter validation.
-    """
-    logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+        response = Mock()
+        should_run, tools_dict = await logger.async_should_run_agentic_loop(
+            response=response,
+            model="gpt-4",
+            messages=[],
+            tools=[{"type": "web_search_20250305", "name": "web_search"}],
+            stream=False,
+            custom_llm_provider="openai",  # Not in enabled_providers
+            kwargs={},
+        )
 
-    # Simulate kwargs that would be passed during agentic loop execution
-    kwargs_with_internal_flags = {
-        "_websearch_interception_converted_stream": True,
-        "_websearch_interception_other_flag": "test",
-        "temperature": 0.7,
-        "max_tokens": 1024,
-    }
+        assert should_run is False
+        assert tools_dict == {}
 
-    # Apply the same filtering logic used in _execute_agentic_loop
-    kwargs_for_followup = {
-        k: v for k, v in kwargs_with_internal_flags.items()
-        if not k.startswith('_websearch_interception')
-    }
+    asyncio.run(_test())
 
-    # Verify internal flags are filtered out
-    assert "_websearch_interception_converted_stream" not in kwargs_for_followup
-    assert "_websearch_interception_other_flag" not in kwargs_for_followup
 
-    # Verify regular kwargs are preserved
-    assert kwargs_for_followup["temperature"] == 0.7
-    assert kwargs_for_followup["max_tokens"] == 1024
+def test_async_should_run_agentic_loop_no_websearch_tool():
+    """Test that agentic loop is NOT triggered when no WebSearch tool in request"""
 
+    async def _test():
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
 
-@pytest.mark.asyncio
-async def test_async_pre_call_deployment_hook_provider_from_top_level_kwargs():
-    """Test that async_pre_call_deployment_hook finds custom_llm_provider at top-level kwargs.
+        response = Mock()
+        should_run, tools_dict = await logger.async_should_run_agentic_loop(
+            response=response,
+            model="bedrock/claude",
+            messages=[],
+            tools=[{"name": "calculator"}],  # No WebSearch tool
+            stream=False,
+            custom_llm_provider="bedrock",
+            kwargs={},
+        )
+
+        assert should_run is False
+        assert tools_dict == {}
+
+    asyncio.run(_test())
+
+
+def test_async_should_run_agentic_loop_no_websearch_in_response():
+    """Test that agentic loop is NOT triggered when response has no WebSearch tool_use"""
+
+    async def _test():
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+
+        # Response with text only, no tool_use
+        response = {"content": [{"type": "text", "text": "I don't need to search for this."}]}
+
+        should_run, tools_dict = await logger.async_should_run_agentic_loop(
+            response=response,
+            model="bedrock/claude",
+            messages=[],
+            tools=[{"type": "web_search_20250305", "name": "web_search"}],
+            stream=False,
+            custom_llm_provider="bedrock",
+            kwargs={},
+        )
+
+        assert should_run is False
+        assert tools_dict == {}
+
+    asyncio.run(_test())
+
+
+def test_async_execute_tool_calls_positive_case():
+    """Test that async_execute_tool_calls returns results for WebSearch tool_use"""
+    from unittest.mock import AsyncMock, patch
+
+    async def _test():
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+
+        # Response with WebSearch tool_use
+        response = {
+            "content": [
+                {
+                    "type": "tool_use",
+                    "id": "tool_123",
+                    "name": "WebSearch",
+                    "input": {"query": "weather in SF"},
+                }
+            ]
+        }
+
+        with patch.object(
+            logger, "_execute_search", new_callable=AsyncMock, return_value="Sunny, 72F"
+        ):
+            results = await logger.async_execute_tool_calls(
+                response=response,
+                kwargs={"custom_llm_provider": "bedrock"},
+            )
+
+        assert len(results) == 1
+        assert results[0].tool_call_id == "tool_123"
+        assert results[0].content == "Sunny, 72F"
+        assert results[0].is_error is False
+
+    asyncio.run(_test())
+
+
+def test_async_execute_tool_calls_with_thinking_blocks():
+    """Test that async_execute_tool_calls works alongside thinking blocks in response"""
+    from unittest.mock import AsyncMock, patch
+
+    async def _test():
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+
+        # Response with thinking block and WebSearch tool_use
+        response = {
+            "content": [
+                {
+                    "type": "thinking",
+                    "thinking": "Let me search for the weather...",
+                },
+                {
+                    "type": "tool_use",
+                    "id": "tool_456",
+                    "name": "WebSearch",
+                    "input": {"query": "current weather SF"},
+                },
+            ]
+        }
+
+        with patch.object(
+            logger, "_execute_search", new_callable=AsyncMock, return_value="Cloudy, 60F"
+        ):
+            results = await logger.async_execute_tool_calls(
+                response=response,
+                kwargs={"custom_llm_provider": "bedrock"},
+            )
+
+        # Should return results for the tool_use block (thinking blocks are
+        # handled by the framework, not the callback)
+        assert len(results) == 1
+        assert results[0].tool_call_id == "tool_456"
+        assert results[0].content == "Cloudy, 60F"
+        assert results[0].is_error is False
+
+    asyncio.run(_test())
+
+
+def test_async_should_run_agentic_loop_empty_tools_list():
+    """Test with empty tools list"""
 
-    Regression test for bug where the hook only checked kwargs["litellm_params"]["custom_llm_provider"]
-    but the router places custom_llm_provider at the top level of kwargs.
-    """
+    async def _test():
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+
+        response = Mock()
+        should_run, tools_dict = await logger.async_should_run_agentic_loop(
+            response=response,
+            model="bedrock/claude",
+            messages=[],
+            tools=[],  # Empty tools list
+            stream=False,
+            custom_llm_provider="bedrock",
+            kwargs={},
+        )
+
+        assert should_run is False
+        assert tools_dict == {}
+
+    asyncio.run(_test())
+
+
+def test_async_should_run_agentic_loop_none_tools():
+    """Test with None tools"""
+
+    async def _test():
+        logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+
+        response = Mock()
+        should_run, tools_dict = await logger.async_should_run_agentic_loop(
+            response=response,
+            model="bedrock/claude",
+            messages=[],
+            tools=None,  # None tools
+            stream=False,
+            custom_llm_provider="bedrock",
+            kwargs={},
+        )
+
+        assert should_run is False
+        assert tools_dict == {}
+
+    asyncio.run(_test())
+
+
+
+@pytest.mark.asyncio
+async def test_async_pre_call_deployment_hook_litellm_params_provider():
+    """Test that async_pre_call_deployment_hook reads custom_llm_provider from litellm_params."""
     logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
 
-    # Simulate kwargs as they arrive from the router path:
-    # custom_llm_provider is at the TOP LEVEL (not nested under litellm_params)
     kwargs = {
         "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
         "messages": [{"role": "user", "content": "Search the web for LiteLLM"}],
@@ -120,19 +348,15 @@ async def test_async_pre_call_deployment_hook_provider_from_top_level_kwargs():
             {"type": "web_search_20250305", "name": "web_search", "max_uses": 3},
             {"type": "function", "function": {"name": "other_tool", "parameters": {}}},
         ],
-        "custom_llm_provider": "bedrock",
+        "litellm_params": {"custom_llm_provider": "bedrock"},
         "api_key": "fake-key",
     }
 
     result = await logger.async_pre_call_deployment_hook(kwargs=kwargs, call_type=None)
 
-    # Should NOT be None — the hook should have triggered
     assert result is not None
-    # The web_search tool should be converted to litellm_web_search (OpenAI format)
-    assert any(
-        t.get("type") == "function" and t.get("function", {}).get("name") == "litellm_web_search"
-        for t in result["tools"]
-    )
+    # The web_search tool should be converted to litellm_web_search (Anthropic format)
+    assert any(t.get("name") == "litellm_web_search" for t in result["tools"])
     # The non-web-search tool should be preserved
     assert any(
         t.get("type") == "function" and t.get("function", {}).get("name") == "other_tool"
@@ -142,11 +366,7 @@ async def test_async_pre_call_deployment_hook_provider_from_top_level_kwargs():
 
 @pytest.mark.asyncio
 async def test_async_pre_call_deployment_hook_returns_full_kwargs():
-    """Test that async_pre_call_deployment_hook returns the full kwargs dict, not a partial one.
-
-    Regression test for bug where the hook returned {"tools": converted_tools} instead of
-    the full kwargs dict, causing model/messages/api_key/etc. to be lost.
-    """
+    """Test that async_pre_call_deployment_hook returns the full kwargs dict."""
     logger = WebSearchInterceptionLogger(enabled_providers=["openai"])
 
     kwargs = {
@@ -155,7 +375,7 @@ async def test_async_pre_call_deployment_hook_returns_full_kwargs():
         "tools": [
             {"type": "web_search_20250305", "name": "web_search"},
         ],
-        "custom_llm_provider": "openai",
+        "litellm_params": {"custom_llm_provider": "openai"},
         "api_key": "sk-fake",
         "temperature": 0.7,
         "metadata": {"user": "test"},
@@ -164,18 +384,12 @@ async def test_async_pre_call_deployment_hook_returns_full_kwargs():
     result = await logger.async_pre_call_deployment_hook(kwargs=kwargs, call_type=None)
 
     assert result is not None
-    # All original keys must be preserved
     assert result["model"] == "gpt-4o"
     assert result["messages"] == [{"role": "user", "content": "Search for something"}]
     assert result["api_key"] == "sk-fake"
     assert result["temperature"] == 0.7
     assert result["metadata"] == {"user": "test"}
-    assert result["custom_llm_provider"] == "openai"
-    # Tools should be converted
-    assert any(
-        t.get("type") == "function" and t.get("function", {}).get("name") == "litellm_web_search"
-        for t in result["tools"]
-    )
+    assert any(t.get("name") == "litellm_web_search" for t in result["tools"])
 
 
 @pytest.mark.asyncio
@@ -187,7 +401,7 @@ async def test_async_pre_call_deployment_hook_skips_disabled_provider():
         "model": "gpt-4o",
         "messages": [{"role": "user", "content": "test"}],
         "tools": [{"type": "web_search_20250305", "name": "web_search"}],
-        "custom_llm_provider": "openai",  # Not in enabled_providers
+        "litellm_params": {"custom_llm_provider": "openai"},  # Not in enabled_providers
     }
 
     result = await logger.async_pre_call_deployment_hook(kwargs=kwargs, call_type=None)
@@ -205,71 +419,8 @@ async def test_async_pre_call_deployment_hook_skips_no_websearch_tools():
         "tools": [
             {"type": "function", "function": {"name": "calculator", "parameters": {}}},
         ],
-        "custom_llm_provider": "openai",
+        "litellm_params": {"custom_llm_provider": "openai"},
     }
 
     result = await logger.async_pre_call_deployment_hook(kwargs=kwargs, call_type=None)
     assert result is None
-
-
-@pytest.mark.asyncio
-async def test_async_pre_call_deployment_hook_nested_litellm_params_fallback():
-    """Test that the hook still works when custom_llm_provider is in nested litellm_params.
-
-    This is the Anthropic experimental pass-through path where litellm_params is
-    explicitly constructed with custom_llm_provider inside it.
-    """
-    logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
-
-    kwargs = {
-        "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-        "messages": [{"role": "user", "content": "test"}],
-        "tools": [{"type": "web_search_20250305", "name": "web_search"}],
-        "litellm_params": {
-            "custom_llm_provider": "bedrock",
-        },
-    }
-
-    result = await logger.async_pre_call_deployment_hook(kwargs=kwargs, call_type=None)
-
-    assert result is not None
-    assert any(
-        t.get("type") == "function" and t.get("function", {}).get("name") == "litellm_web_search"
-        for t in result["tools"]
-    )
-    # Full kwargs preserved
-    assert result["model"] == "anthropic.claude-3-5-sonnet-20241022-v2:0"
-
-
-@pytest.mark.asyncio
-async def test_async_pre_call_deployment_hook_provider_derived_from_model_name():
-    """Test that async_pre_call_deployment_hook derives custom_llm_provider from the model name.
-
-    Regression test for the router _acompletion path where custom_llm_provider is NOT
-    in kwargs at all — neither at top-level nor in litellm_params. The hook must derive
-    the provider from the model name (e.g., "openai/gpt-4o-mini" → "openai").
-    """
-    logger = WebSearchInterceptionLogger(enabled_providers=["openai"])
-
-    # Simulate kwargs as they arrive from router._acompletion:
-    # NO custom_llm_provider key anywhere — only model name contains the provider
-    kwargs = {
-        "model": "openai/gpt-4o-mini",
-        "messages": [{"role": "user", "content": "Search the web for LiteLLM"}],
-        "tools": [
-            {"type": "web_search_20250305", "name": "web_search", "max_uses": 3},
-        ],
-        "api_key": "fake-key",
-    }
-
-    result = await logger.async_pre_call_deployment_hook(kwargs=kwargs, call_type=None)
-
-    # Should NOT be None — the hook should derive "openai" from "openai/gpt-4o-mini"
-    assert result is not None
-    assert any(
-        t.get("type") == "function" and t.get("function", {}).get("name") == "litellm_web_search"
-        for t in result["tools"]
-    )
-    # Full kwargs preserved
-    assert result["model"] == "openai/gpt-4o-mini"
-    assert result["api_key"] == "fake-key"
diff --git a/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_thinking.py b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_thinking.py
index 8093ce6fc12..75d2b056932 100644
--- a/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_thinking.py
+++ b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_thinking.py
@@ -1,11 +1,12 @@
 """
 Unit tests for WebSearch Interception with Extended Thinking
 
-Tests that the websearch interception agentic loop correctly handles
-thinking/redacted_thinking blocks when extended thinking is enabled.
+Tests that the websearch interception correctly handles thinking/redacted_thinking
+blocks, both at the transformation layer and the async_execute_tool_calls layer.
 """
 
-from unittest.mock import Mock
+import asyncio
+from unittest.mock import Mock, AsyncMock, patch
 
 import pytest
 
@@ -72,11 +73,11 @@ def test_no_thinking_blocks_backward_compat(self):
         ]
         search_results = ["Search result text"]
 
-        # No thinking_blocks param (default None)
         assistant_msg, _ = (
             WebSearchTransformation._transform_response_anthropic(
                 tool_calls=tool_calls,
                 search_results=search_results,
+                thinking_blocks=[],
             )
         )
 
@@ -173,12 +174,12 @@ def test_transform_response_openai_ignores_thinking(self):
         assert "content" not in assistant_msg
 
 
-class TestAgenticLoopThinkingExtraction:
-    """Tests for thinking block extraction in async_should_run_agentic_loop."""
+class TestAsyncExecuteToolCallsWithThinking:
+    """Tests for async_execute_tool_calls with thinking blocks in response."""
 
     @pytest.mark.asyncio
-    async def test_extracts_thinking_blocks_from_dict_response(self):
-        """Test extraction of thinking blocks from dict-style response."""
+    async def test_executes_tool_calls_with_thinking_in_response(self):
+        """Test that async_execute_tool_calls works when response has thinking + tool_use blocks."""
         logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
 
         response = {
@@ -198,79 +199,45 @@ async def test_extracts_thinking_blocks_from_dict_response(self):
             ]
         }
 
-        should_run, tools_dict = await logger.async_should_run_agentic_loop(
-            response=response,
-            model="bedrock/claude",
-            messages=[],
-            tools=[{"name": "WebSearch"}],
-            stream=False,
-            custom_llm_provider="bedrock",
-            kwargs={},
-        )
+        with patch.object(logger, "_execute_search", new_callable=AsyncMock) as mock_search:
+            mock_search.return_value = "Title: News\nURL: https://example.com\nSnippet: Latest"
 
-        assert should_run is True
-        assert len(tools_dict["tool_calls"]) == 1
-        assert len(tools_dict["thinking_blocks"]) == 2
-        assert tools_dict["thinking_blocks"][0]["type"] == "thinking"
-        assert tools_dict["thinking_blocks"][0]["thinking"] == "Let me think..."
-        assert tools_dict["thinking_blocks"][1]["type"] == "redacted_thinking"
-        assert tools_dict["thinking_blocks"][1]["data"] == "redacted_data"
+            results = await logger.async_execute_tool_calls(
+                response=response,
+                kwargs={"custom_llm_provider": "bedrock"},
+            )
 
-    @pytest.mark.asyncio
-    async def test_extracts_thinking_blocks_from_object_response(self):
-        """Test extraction of thinking blocks from non-dict response objects.
+        assert len(results) == 1
+        assert results[0].tool_call_id == "toolu_01"
+        assert results[0].is_error is False
+        assert "News" in results[0].content
 
-        In practice, the Anthropic pass-through always returns plain dicts
-        (TypedDict(**raw_json) produces a dict). This test covers the safety
-        branch for non-dict response objects.
-        """
+    @pytest.mark.asyncio
+    async def test_no_results_when_no_tool_calls(self):
+        """Test that thinking-only responses don't trigger tool execution."""
         logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
 
-        # Simulate object-style response blocks
-        thinking_block = Mock()
-        thinking_block.type = "thinking"
-        thinking_block.thinking = "Reasoning..."
-        thinking_block.signature = "sig"
-
-        redacted_block = Mock()
-        redacted_block.type = "redacted_thinking"
-        redacted_block.data = "abc"
-
-        tool_block = Mock()
-        tool_block.type = "tool_use"
-        tool_block.name = "litellm_web_search"
-        tool_block.id = "toolu_01"
-        tool_block.input = {"query": "test"}
-
-        response = Mock()
-        response.content = [thinking_block, redacted_block, tool_block]
+        response = {
+            "content": [
+                {
+                    "type": "thinking",
+                    "thinking": "Just thinking...",
+                    "signature": "sig",
+                },
+                {"type": "text", "text": "Here is my response."},
+            ]
+        }
 
-        should_run, tools_dict = await logger.async_should_run_agentic_loop(
+        results = await logger.async_execute_tool_calls(
             response=response,
-            model="bedrock/claude",
-            messages=[],
-            tools=[{"name": "WebSearch"}],
-            stream=False,
-            custom_llm_provider="bedrock",
-            kwargs={},
+            kwargs={"custom_llm_provider": "bedrock"},
         )
 
-        assert should_run is True
-        assert len(tools_dict["thinking_blocks"]) == 2
-        # Verify getattr-based conversion produced correct dicts
-        assert tools_dict["thinking_blocks"][0] == {
-            "type": "thinking",
-            "thinking": "Reasoning...",
-            "signature": "sig",
-        }
-        assert tools_dict["thinking_blocks"][1] == {
-            "type": "redacted_thinking",
-            "data": "abc",
-        }
+        assert results == []
 
     @pytest.mark.asyncio
-    async def test_no_thinking_blocks_when_thinking_disabled(self):
-        """Test that thinking_blocks is empty when response has no thinking."""
+    async def test_no_results_when_thinking_disabled(self):
+        """Test that tool_use without thinking blocks still works."""
         logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
 
         response = {
@@ -284,44 +251,37 @@ async def test_no_thinking_blocks_when_thinking_disabled(self):
             ]
         }
 
-        should_run, tools_dict = await logger.async_should_run_agentic_loop(
-            response=response,
-            model="bedrock/claude",
-            messages=[],
-            tools=[{"name": "WebSearch"}],
-            stream=False,
-            custom_llm_provider="bedrock",
-            kwargs={},
-        )
+        with patch.object(logger, "_execute_search", new_callable=AsyncMock) as mock_search:
+            mock_search.return_value = "Search result text"
+
+            results = await logger.async_execute_tool_calls(
+                response=response,
+                kwargs={"custom_llm_provider": "bedrock"},
+            )
 
-        assert should_run is True
-        assert tools_dict["thinking_blocks"] == []
+        assert len(results) == 1
+        assert results[0].tool_call_id == "toolu_01"
+        assert results[0].is_error is False
 
     @pytest.mark.asyncio
-    async def test_thinking_blocks_not_extracted_when_no_tool_calls(self):
-        """Test that no extraction happens when no websearch tool calls found."""
+    async def test_skips_wrong_provider(self):
+        """Test that async_execute_tool_calls returns empty for wrong provider."""
         logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
 
         response = {
             "content": [
                 {
-                    "type": "thinking",
-                    "thinking": "Just thinking...",
-                    "signature": "sig",
+                    "type": "tool_use",
+                    "id": "toolu_01",
+                    "name": "litellm_web_search",
+                    "input": {"query": "test"},
                 },
-                {"type": "text", "text": "Here is my response."},
             ]
         }
 
-        should_run, tools_dict = await logger.async_should_run_agentic_loop(
+        results = await logger.async_execute_tool_calls(
             response=response,
-            model="bedrock/claude",
-            messages=[],
-            tools=[{"name": "WebSearch"}],
-            stream=False,
-            custom_llm_provider="bedrock",
-            kwargs={},
+            kwargs={"custom_llm_provider": "openai"},
         )
 
-        assert should_run is False
-        assert tools_dict == {}
+        assert results == []