diff --git a/litellm/llms/bedrock/chat/converse_handler.py b/litellm/llms/bedrock/chat/converse_handler.py
index 60a93b169c8..ec5b942ec1b 100644
--- a/litellm/llms/bedrock/chat/converse_handler.py
+++ b/litellm/llms/bedrock/chat/converse_handler.py
@@ -68,7 +68,7 @@ def make_sync_call(
             model_response=model_response, json_mode=json_mode
         )
     else:
-        decoder = AWSEventStreamDecoder(model=model)
+        decoder = AWSEventStreamDecoder(model=model, json_mode=json_mode)
         completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=stream_chunk_size))
 
     # LOGGING
diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py
index a0f2f65fb7f..d210f294c64 100644
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@@ -1779,6 +1779,92 @@ def _translate_message_content(self, content_blocks: List[ContentBlock]) -> Tupl
 
         return content_str, tools, reasoningContentBlocks, citationsContentBlocks
 
+    @staticmethod
+    def _unwrap_bedrock_properties(json_str: str) -> str:
+        """
+        Unwrap Bedrock's response_format JSON structure.
+
+        If the JSON has a single "properties" key, extract its value.
+        Otherwise, return the original string.
+
+        Args:
+            json_str: JSON string to unwrap
+
+        Returns:
+            Unwrapped JSON string or original if unwrapping not needed
+        """
+        try:
+            response_data = json.loads(json_str)
+            if (
+                isinstance(response_data, dict)
+                and "properties" in response_data
+                and len(response_data) == 1
+            ):
+                response_data = response_data["properties"]
+                return json.dumps(response_data)
+        except json.JSONDecodeError:
+            pass
+        return json_str
+
+    @staticmethod
+    def _filter_json_mode_tools(
+        json_mode: Optional[bool],
+        tools: List[ChatCompletionToolCallChunk],
+        chat_completion_message: ChatCompletionResponseMessage,
+    ) -> Optional[List[ChatCompletionToolCallChunk]]:
+        """
+        When json_mode is True, Bedrock may return the internal `json_tool_call`
+        tool alongside real user-defined tools. This method handles 3 scenarios:
+
+        1. Only json_tool_call present  -> convert to text content, return None
+        2. Mixed json_tool_call + real  -> filter out json_tool_call, return real tools
+        3. No json_tool_call / no json_mode -> return tools as-is
+        """
+        if not json_mode or not tools:
+            return tools if tools else None
+
+        json_tool_indices = [
+            i
+            for i, t in enumerate(tools)
+            if t["function"].get("name") == RESPONSE_FORMAT_TOOL_NAME
+        ]
+
+        if not json_tool_indices:
+            # No json_tool_call found, return tools unchanged
+            return tools
+
+        if len(json_tool_indices) == len(tools):
+            # All tools are json_tool_call — convert first one to content
+            verbose_logger.debug(
+                "Processing JSON tool call response for response_format"
+            )
+            json_mode_content_str: Optional[str] = tools[0]["function"].get(
+                "arguments"
+            )
+            if json_mode_content_str is not None:
+                json_mode_content_str = AmazonConverseConfig._unwrap_bedrock_properties(
+                    json_mode_content_str
+                )
+                chat_completion_message["content"] = json_mode_content_str
+            return None
+
+        # Mixed: filter out json_tool_call, keep real tools.
+        # Preserve the json_tool_call content as message text so the structured
+        # output from response_format is not silently lost.
+        first_idx = json_tool_indices[0]
+        json_mode_args = tools[first_idx]["function"].get("arguments")
+        if json_mode_args is not None:
+            json_mode_args = AmazonConverseConfig._unwrap_bedrock_properties(
+                json_mode_args
+            )
+            existing = chat_completion_message.get("content") or ""
+            chat_completion_message["content"] = (
+                existing + json_mode_args if existing else json_mode_args
+            )
+
+        real_tools = [t for i, t in enumerate(tools) if i not in json_tool_indices]
+        return real_tools if real_tools else None
+
     def _transform_response(  # noqa: PLR0915
         self,
         model: str,
@@ -1801,7 +1887,7 @@ def _transform_response(  # noqa: PLR0915
                 additional_args={"complete_input_dict": data},
             )
 
-        json_mode: Optional[bool] = optional_params.pop("json_mode", None)
+        json_mode: Optional[bool] = optional_params.get("json_mode", None)
         ## RESPONSE OBJECT
         try:
             completion_response = ConverseResponseBlock(**response.json())  # type: ignore
@@ -1885,37 +1971,13 @@ def _transform_response(  # noqa: PLR0915
                 self._transform_thinking_blocks(reasoningContentBlocks)
             )
         chat_completion_message["content"] = content_str
-        if (
-            json_mode is True
-            and tools is not None
-            and len(tools) == 1
-            and tools[0]["function"].get("name") == RESPONSE_FORMAT_TOOL_NAME
-        ):
-            verbose_logger.debug(
-                "Processing JSON tool call response for response_format"
-            )
-            json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments")
-            if json_mode_content_str is not None:
-                # Bedrock returns the response wrapped in a "properties" object
-                # We need to extract the actual content from this wrapper
-                try:
-                    response_data = json.loads(json_mode_content_str)
-
-                    # If Bedrock wrapped the response in "properties", extract the content
-                    if (
-                        isinstance(response_data, dict)
-                        and "properties" in response_data
-                        and len(response_data) == 1
-                    ):
-                        response_data = response_data["properties"]
-                        json_mode_content_str = json.dumps(response_data)
-                except json.JSONDecodeError:
-                    # If parsing fails, use the original response
-                    pass
-
-                chat_completion_message["content"] = json_mode_content_str
-        elif tools:
-            chat_completion_message["tool_calls"] = tools
+        filtered_tools = self._filter_json_mode_tools(
+            json_mode=json_mode,
+            tools=tools,
+            chat_completion_message=chat_completion_message,
+        )
+        if filtered_tools:
+            chat_completion_message["tool_calls"] = filtered_tools
 
         ## CALCULATING USAGE - bedrock returns usage in the headers
         usage = self._transform_usage(completion_response["usage"])
diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py
index 1c58a11eebe..88f7341ed08 100644
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@@ -22,6 +22,7 @@
 from litellm import verbose_logger
 from litellm._uuid import uuid
 from litellm.caching.caching import InMemoryCache
+from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
@@ -252,7 +253,7 @@ async def make_call(
                 response.aiter_bytes(chunk_size=stream_chunk_size)
             )
         else:
-            decoder = AWSEventStreamDecoder(model=model)
+            decoder = AWSEventStreamDecoder(model=model, json_mode=json_mode)
             completion_stream = decoder.aiter_bytes(
                 response.aiter_bytes(chunk_size=stream_chunk_size)
             )
@@ -346,7 +347,7 @@ def make_sync_call(
                 response.iter_bytes(chunk_size=stream_chunk_size)
             )
         else:
-            decoder = AWSEventStreamDecoder(model=model)
+            decoder = AWSEventStreamDecoder(model=model, json_mode=json_mode)
             completion_stream = decoder.iter_bytes(
                 response.iter_bytes(chunk_size=stream_chunk_size)
             )
@@ -1282,7 +1283,7 @@ def get_response_stream_shape():
 
 
 class AWSEventStreamDecoder:
-    def __init__(self, model: str) -> None:
+    def __init__(self, model: str, json_mode: Optional[bool] = False) -> None:
         from botocore.parsers import EventStreamJSONParser
 
         self.model = model
@@ -1290,6 +1291,8 @@ def __init__(self, model: str) -> None:
         self.content_blocks: List[ContentBlockDeltaEvent] = []
         self.tool_calls_index: Optional[int] = None
         self.response_id: Optional[str] = None
+        self.json_mode = json_mode
+        self._current_tool_name: Optional[str] = None
 
     def check_empty_tool_call_args(self) -> bool:
         """
@@ -1391,6 +1394,16 @@ def _handle_converse_start_event(
                 response_tool_name = get_bedrock_tool_name(
                     response_tool_name=_response_tool_name
                 )
+                self._current_tool_name = response_tool_name
+
+                # When json_mode is True, suppress the internal json_tool_call
+                # and convert its content to text in delta events instead
+                if (
+                    self.json_mode is True
+                    and response_tool_name == RESPONSE_FORMAT_TOOL_NAME
+                ):
+                    return tool_use, provider_specific_fields, thinking_blocks
+
                 self.tool_calls_index = (
                     0 if self.tool_calls_index is None else self.tool_calls_index + 1
                 )
@@ -1445,19 +1458,27 @@ def _handle_converse_delta_event(
         if "text" in delta_obj:
             text = delta_obj["text"]
         elif "toolUse" in delta_obj:
-            tool_use = {
-                "id": None,
-                "type": "function",
-                "function": {
-                    "name": None,
-                    "arguments": delta_obj["toolUse"]["input"],
-                },
-                "index": (
-                    self.tool_calls_index
-                    if self.tool_calls_index is not None
-                    else index
-                ),
-            }
+            # When json_mode is True and this is the internal json_tool_call,
+            # convert tool input to text content instead of tool call arguments
+            if (
+                self.json_mode is True
+                and self._current_tool_name == RESPONSE_FORMAT_TOOL_NAME
+            ):
+                text = delta_obj["toolUse"]["input"]
+            else:
+                tool_use = {
+                    "id": None,
+                    "type": "function",
+                    "function": {
+                        "name": None,
+                        "arguments": delta_obj["toolUse"]["input"],
+                    },
+                    "index": (
+                        self.tool_calls_index
+                        if self.tool_calls_index is not None
+                        else index
+                    ),
+                }
         elif "reasoningContent" in delta_obj:
             provider_specific_fields = {
                 "reasoningContent": delta_obj["reasoningContent"],
@@ -1494,6 +1515,17 @@ def _handle_converse_stop_event(
     ) -> Optional[ChatCompletionToolCallChunk]:
         """Handle stop/contentBlockIndex event in converse chunk parsing."""
         tool_use: Optional[ChatCompletionToolCallChunk] = None
+
+        # If the ending block was the internal json_tool_call, skip emitting
+        # the empty-args tool chunk and reset tracking state
+        if (
+            self.json_mode is True
+            and self._current_tool_name == RESPONSE_FORMAT_TOOL_NAME
+        ):
+            self._current_tool_name = None
+            return tool_use
+
+        self._current_tool_name = None
         is_empty = self.check_empty_tool_call_args()
         if is_empty:
             tool_use = {
diff --git a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py
index f6d3d3c12f7..345f3ae7c5d 100644
--- a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py
+++ b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py
@@ -3493,3 +3493,262 @@ def test_no_thinking_param_does_not_error(self):
             drop_params=False,
         )
         assert "thinking" not in result or result.get("thinking") is None
+
+def test_transform_response_with_both_json_tool_call_and_real_tool():
+    """
+    When Bedrock returns BOTH json_tool_call AND a real tool (get_weather),
+    only the real tool should remain in tool_calls. The json_tool_call should be filtered out.
+    Fixes https://github.com/BerriAI/litellm/issues/18381
+    """
+    from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig
+    from litellm.types.utils import ModelResponse
+
+    response_json = {
+        "metrics": {"latencyMs": 200},
+        "output": {
+            "message": {
+                "role": "assistant",
+                "content": [
+                    {
+                        "toolUse": {
+                            "toolUseId": "tooluse_json_001",
+                            "name": "json_tool_call",
+                            "input": {
+                                "Current_Temperature": 62,
+                                "Weather_Explanation": "Mild and cool.",
+                            },
+                        }
+                    },
+                    {
+                        "toolUse": {
+                            "toolUseId": "tooluse_weather_001",
+                            "name": "get_weather",
+                            "input": {
+                                "location": "San Francisco, CA",
+                                "unit": "fahrenheit",
+                            },
+                        }
+                    },
+                ],
+            }
+        },
+        "stopReason": "tool_use",
+        "usage": {
+            "inputTokens": 100,
+            "outputTokens": 50,
+            "totalTokens": 150,
+            "cacheReadInputTokenCount": 0,
+            "cacheReadInputTokens": 0,
+            "cacheWriteInputTokenCount": 0,
+            "cacheWriteInputTokens": 0,
+        },
+    }
+
+    class MockResponse:
+        def json(self):
+            return response_json
+
+        @property
+        def text(self):
+            return json.dumps(response_json)
+
+    config = AmazonConverseConfig()
+    model_response = ModelResponse()
+    optional_params = {"json_mode": True}
+
+    result = config._transform_response(
+        model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
+        response=MockResponse(),
+        model_response=model_response,
+        stream=False,
+        logging_obj=None,
+        optional_params=optional_params,
+        api_key=None,
+        data=None,
+        messages=[],
+        encoding=None,
+    )
+
+    # Only real tool should remain
+    assert result.choices[0].message.tool_calls is not None
+    assert len(result.choices[0].message.tool_calls) == 1
+    assert result.choices[0].message.tool_calls[0].function.name == "get_weather"
+    assert (
+        result.choices[0].message.tool_calls[0].function.arguments
+        == '{"location": "San Francisco, CA", "unit": "fahrenheit"}'
+    )
+
+    # json_tool_call content should be preserved as message text
+    content = result.choices[0].message.content
+    assert content is not None
+    parsed = json.loads(content)
+    assert parsed["Current_Temperature"] == 62
+    assert parsed["Weather_Explanation"] == "Mild and cool."
+
+
+def test_transform_response_does_not_mutate_optional_params():
+    """
+    Verify that optional_params still contains json_mode after _transform_response.
+    Previously, .pop() was used which mutated the caller's dict.
+    """
+    from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig
+    from litellm.types.utils import ModelResponse
+
+    response_json = {
+        "metrics": {"latencyMs": 50},
+        "output": {
+            "message": {
+                "role": "assistant",
+                "content": [
+                    {
+                        "toolUse": {
+                            "toolUseId": "tooluse_001",
+                            "name": "json_tool_call",
+                            "input": {"result": "ok"},
+                        }
+                    }
+                ],
+            }
+        },
+        "stopReason": "tool_use",
+        "usage": {
+            "inputTokens": 10,
+            "outputTokens": 5,
+            "totalTokens": 15,
+            "cacheReadInputTokenCount": 0,
+            "cacheReadInputTokens": 0,
+            "cacheWriteInputTokenCount": 0,
+            "cacheWriteInputTokens": 0,
+        },
+    }
+
+    class MockResponse:
+        def json(self):
+            return response_json
+
+        @property
+        def text(self):
+            return json.dumps(response_json)
+
+    config = AmazonConverseConfig()
+    model_response = ModelResponse()
+    optional_params = {"json_mode": True, "other_key": "value"}
+
+    config._transform_response(
+        model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
+        response=MockResponse(),
+        model_response=model_response,
+        stream=False,
+        logging_obj=None,
+        optional_params=optional_params,
+        api_key=None,
+        data=None,
+        messages=[],
+        encoding=None,
+    )
+
+    # json_mode should still be in optional_params (not popped)
+    assert "json_mode" in optional_params
+    assert optional_params["json_mode"] is True
+    assert optional_params["other_key"] == "value"
+
+
+def test_streaming_filters_json_tool_call_with_real_tools():
+    """
+    Simulate streaming chunks where both json_tool_call and a real tool arrive.
+    Verify json_tool_call chunks are converted to text content while real tool
+    chunks pass through normally.
+    """
+    from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder
+    from litellm.types.llms.bedrock import (
+        ContentBlockDeltaEvent,
+        ContentBlockStartEvent,
+    )
+
+    decoder = AWSEventStreamDecoder(model="test-model", json_mode=True)
+
+    # Chunk 1: json_tool_call start
+    json_start = ContentBlockStartEvent(
+        toolUse={
+            "toolUseId": "tooluse_json_001",
+            "name": "json_tool_call",
+        }
+    )
+    tool_use_1, _, _ = decoder._handle_converse_start_event(json_start)
+    # json_tool_call start should be suppressed (return None tool_use)
+    assert tool_use_1 is None
+    # tool_calls_index should NOT have been incremented
+    assert decoder.tool_calls_index is None
+
+    # Chunk 2: json_tool_call delta — should become text, not tool_use
+    json_delta = ContentBlockDeltaEvent(toolUse={"input": '{"temp": 62}'})
+    text_2, tool_use_2, _, _, _ = decoder._handle_converse_delta_event(
+        json_delta, index=0
+    )
+    assert text_2 == '{"temp": 62}'
+    assert tool_use_2 is None
+
+    # Chunk 3: json_tool_call stop
+    stop_tool = decoder._handle_converse_stop_event(index=0)
+    assert stop_tool is None
+    # _current_tool_name should be reset
+    assert decoder._current_tool_name is None
+
+    # Chunk 4: real tool start
+    real_start = ContentBlockStartEvent(
+        toolUse={
+            "toolUseId": "tooluse_weather_001",
+            "name": "get_weather",
+        }
+    )
+    tool_use_4, _, _ = decoder._handle_converse_start_event(real_start)
+    assert tool_use_4 is not None
+    assert tool_use_4["function"]["name"] == "get_weather"
+    assert decoder.tool_calls_index == 0
+
+    # Chunk 5: real tool delta
+    real_delta = ContentBlockDeltaEvent(
+        toolUse={"input": '{"location": "SF"}'}
+    )
+    text_5, tool_use_5, _, _, _ = decoder._handle_converse_delta_event(
+        real_delta, index=1
+    )
+    assert text_5 == ""
+    assert tool_use_5 is not None
+    assert tool_use_5["function"]["arguments"] == '{"location": "SF"}'
+
+
+def test_streaming_without_json_mode_passes_all_tools():
+    """
+    Verify backward compatibility: when json_mode=False, all tools
+    (including json_tool_call if present) pass through unchanged.
+    """
+    from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder
+    from litellm.types.llms.bedrock import (
+        ContentBlockDeltaEvent,
+        ContentBlockStartEvent,
+    )
+
+    decoder = AWSEventStreamDecoder(model="test-model", json_mode=False)
+
+    # json_tool_call start — should pass through when json_mode=False
+    json_start = ContentBlockStartEvent(
+        toolUse={
+            "toolUseId": "tooluse_json_001",
+            "name": "json_tool_call",
+        }
+    )
+    tool_use, _, _ = decoder._handle_converse_start_event(json_start)
+    assert tool_use is not None
+    assert tool_use["function"]["name"] == "json_tool_call"
+    assert decoder.tool_calls_index == 0
+
+    # json_tool_call delta — should be a tool_use, not text
+    json_delta = ContentBlockDeltaEvent(toolUse={"input": '{"data": 1}'})
+    text, tool_use_delta, _, _, _ = decoder._handle_converse_delta_event(
+        json_delta, index=0
+    )
+    assert text == ""
+    assert tool_use_delta is not None
+    assert tool_use_delta["function"]["arguments"] == '{"data": 1}'
+