From ebc9481b5bb59e2391ffb4926d3f16109f1f8c65 Mon Sep 17 00:00:00 2001
From: Michael Cusack <mcusack@altoslabs.com>
Date: Thu, 22 Jan 2026 09:38:56 -0800
Subject: [PATCH] fix(websearch_interception): filter internal kwargs before
 follow-up request

The websearch interception handler was passing internal flags like
`_websearch_interception_converted_stream` to the follow-up LLM request.
This caused "Extra inputs are not permitted" errors from providers like
Bedrock that use strict Pydantic validation.

Fix: Filter out all kwargs starting with `_websearch_interception` prefix
before making the follow-up anthropic_messages.acreate() call.
---
 .../websearch_interception/handler.py         |  9 ++++-
 .../test_websearch_interception_handler.py    | 33 +++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/litellm/integrations/websearch_interception/handler.py b/litellm/integrations/websearch_interception/handler.py
index 943a2bb4f36..5d36b760afb 100644
--- a/litellm/integrations/websearch_interception/handler.py
+++ b/litellm/integrations/websearch_interception/handler.py
@@ -413,6 +413,13 @@ async def _execute_agentic_loop(
                 if k != 'max_tokens'
             }
 
+            # Remove internal websearch interception flags from kwargs before follow-up request
+            # These flags are used internally and should not be passed to the LLM provider
+            kwargs_for_followup = {
+                k: v for k, v in kwargs.items()
+                if not k.startswith('_websearch_interception')
+            }
+
             # Get model from logging_obj.model_call_details["agentic_loop_params"]
             # This preserves the full model name with provider prefix (e.g., "bedrock/invoke/...")
             full_model_name = model
@@ -428,7 +435,7 @@ async def _execute_agentic_loop(
                 messages=follow_up_messages,
                 model=full_model_name,
                 **optional_params_without_max_tokens,
-                **kwargs,
+                **kwargs_for_followup,
             )
             verbose_logger.debug(
                 f"WebSearchInterception: Follow-up request completed, response type: {type(final_response)}"
diff --git a/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py
index 8ac53315aa0..5abecb46c99 100644
--- a/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py
+++ b/tests/test_litellm/integrations/websearch_interception/test_websearch_interception_handler.py
@@ -67,3 +67,36 @@ async def test_async_should_run_agentic_loop():
 
     assert should_run is False
     assert tools_dict == {}
+
+
+@pytest.mark.asyncio
+async def test_internal_flags_filtered_from_followup_kwargs():
+    """Test that internal _websearch_interception flags are filtered from follow-up request kwargs.
+
+    Regression test for bug where _websearch_interception_converted_stream was passed
+    to the follow-up LLM request, causing "Extra inputs are not permitted" errors
+    from providers like Bedrock that use strict parameter validation.
+    """
+    logger = WebSearchInterceptionLogger(enabled_providers=["bedrock"])
+
+    # Simulate kwargs that would be passed during agentic loop execution
+    kwargs_with_internal_flags = {
+        "_websearch_interception_converted_stream": True,
+        "_websearch_interception_other_flag": "test",
+        "temperature": 0.7,
+        "max_tokens": 1024,
+    }
+
+    # Apply the same filtering logic used in _execute_agentic_loop
+    kwargs_for_followup = {
+        k: v for k, v in kwargs_with_internal_flags.items()
+        if not k.startswith('_websearch_interception')
+    }
+
+    # Verify internal flags are filtered out
+    assert "_websearch_interception_converted_stream" not in kwargs_for_followup
+    assert "_websearch_interception_other_flag" not in kwargs_for_followup
+
+    # Verify regular kwargs are preserved
+    assert kwargs_for_followup["temperature"] == 0.7
+    assert kwargs_for_followup["max_tokens"] == 1024