diff --git a/litellm/llms/bedrock/chat/converse_handler.py b/litellm/llms/bedrock/chat/converse_handler.py index 60a93b169c8..ec5b942ec1b 100644 --- a/litellm/llms/bedrock/chat/converse_handler.py +++ b/litellm/llms/bedrock/chat/converse_handler.py @@ -68,7 +68,7 @@ def make_sync_call( model_response=model_response, json_mode=json_mode ) else: - decoder = AWSEventStreamDecoder(model=model) + decoder = AWSEventStreamDecoder(model=model, json_mode=json_mode) completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=stream_chunk_size)) # LOGGING diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index a0f2f65fb7f..d210f294c64 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -1779,6 +1779,92 @@ def _translate_message_content(self, content_blocks: List[ContentBlock]) -> Tupl return content_str, tools, reasoningContentBlocks, citationsContentBlocks + @staticmethod + def _unwrap_bedrock_properties(json_str: str) -> str: + """ + Unwrap Bedrock's response_format JSON structure. + + If the JSON has a single "properties" key, extract its value. + Otherwise, return the original string. + + Args: + json_str: JSON string to unwrap + + Returns: + Unwrapped JSON string or original if unwrapping not needed + """ + try: + response_data = json.loads(json_str) + if ( + isinstance(response_data, dict) + and "properties" in response_data + and len(response_data) == 1 + ): + response_data = response_data["properties"] + return json.dumps(response_data) + except json.JSONDecodeError: + pass + return json_str + + @staticmethod + def _filter_json_mode_tools( + json_mode: Optional[bool], + tools: List[ChatCompletionToolCallChunk], + chat_completion_message: ChatCompletionResponseMessage, + ) -> Optional[List[ChatCompletionToolCallChunk]]: + """ + When json_mode is True, Bedrock may return the internal `json_tool_call` + tool alongside real user-defined tools. This method handles 3 scenarios: + + 1. Only json_tool_call present -> convert to text content, return None + 2. Mixed json_tool_call + real -> filter out json_tool_call, return real tools + 3. No json_tool_call / no json_mode -> return tools as-is + """ + if not json_mode or not tools: + return tools if tools else None + + json_tool_indices = [ + i + for i, t in enumerate(tools) + if t["function"].get("name") == RESPONSE_FORMAT_TOOL_NAME + ] + + if not json_tool_indices: + # No json_tool_call found, return tools unchanged + return tools + + if len(json_tool_indices) == len(tools): + # All tools are json_tool_call — convert first one to content + verbose_logger.debug( + "Processing JSON tool call response for response_format" + ) + json_mode_content_str: Optional[str] = tools[0]["function"].get( + "arguments" + ) + if json_mode_content_str is not None: + json_mode_content_str = AmazonConverseConfig._unwrap_bedrock_properties( + json_mode_content_str + ) + chat_completion_message["content"] = json_mode_content_str + return None + + # Mixed: filter out json_tool_call, keep real tools. + # Preserve the json_tool_call content as message text so the structured + # output from response_format is not silently lost. + first_idx = json_tool_indices[0] + json_mode_args = tools[first_idx]["function"].get("arguments") + if json_mode_args is not None: + json_mode_args = AmazonConverseConfig._unwrap_bedrock_properties( + json_mode_args + ) + existing = chat_completion_message.get("content") or "" + chat_completion_message["content"] = ( + existing + json_mode_args if existing else json_mode_args + ) + + real_tools = [t for i, t in enumerate(tools) if i not in json_tool_indices] + return real_tools if real_tools else None + def _transform_response( # noqa: PLR0915 self, model: str, @@ -1801,7 +1887,7 @@ def _transform_response( # noqa: PLR0915 additional_args={"complete_input_dict": data}, ) - json_mode: Optional[bool] = optional_params.pop("json_mode", None) + json_mode: Optional[bool] = optional_params.get("json_mode", None) ## RESPONSE OBJECT try: completion_response = ConverseResponseBlock(**response.json()) # type: ignore @@ -1885,37 +1971,13 @@ def _transform_response( # noqa: PLR0915 self._transform_thinking_blocks(reasoningContentBlocks) ) chat_completion_message["content"] = content_str - if ( - json_mode is True - and tools is not None - and len(tools) == 1 - and tools[0]["function"].get("name") == RESPONSE_FORMAT_TOOL_NAME - ): - verbose_logger.debug( - "Processing JSON tool call response for response_format" - ) - json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments") - if json_mode_content_str is not None: - # Bedrock returns the response wrapped in a "properties" object - # We need to extract the actual content from this wrapper - try: - response_data = json.loads(json_mode_content_str) - - # If Bedrock wrapped the response in "properties", extract the content - if ( - isinstance(response_data, dict) - and "properties" in response_data - and len(response_data) == 1 - ): - response_data = response_data["properties"] - json_mode_content_str = json.dumps(response_data) - except json.JSONDecodeError: - # If parsing fails, use the original response - pass - - chat_completion_message["content"] = json_mode_content_str - elif tools: - chat_completion_message["tool_calls"] = tools + filtered_tools = self._filter_json_mode_tools( + json_mode=json_mode, + tools=tools, + chat_completion_message=chat_completion_message, + ) + if filtered_tools: + chat_completion_message["tool_calls"] = filtered_tools ## CALCULATING USAGE - bedrock returns usage in the headers usage = self._transform_usage(completion_response["usage"]) diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py index 1c58a11eebe..88f7341ed08 100644 --- a/litellm/llms/bedrock/chat/invoke_handler.py +++ b/litellm/llms/bedrock/chat/invoke_handler.py @@ -22,6 +22,7 @@ from litellm import verbose_logger from litellm._uuid import uuid from litellm.caching.caching import InMemoryCache +from litellm.constants import RESPONSE_FORMAT_TOOL_NAME from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.litellm_core_utils.litellm_logging import Logging from litellm.litellm_core_utils.logging_utils import track_llm_api_timing @@ -252,7 +253,7 @@ async def make_call( response.aiter_bytes(chunk_size=stream_chunk_size) ) else: - decoder = AWSEventStreamDecoder(model=model) + decoder = AWSEventStreamDecoder(model=model, json_mode=json_mode) completion_stream = decoder.aiter_bytes( response.aiter_bytes(chunk_size=stream_chunk_size) ) @@ -346,7 +347,7 @@ def make_sync_call( response.iter_bytes(chunk_size=stream_chunk_size) ) else: - decoder = AWSEventStreamDecoder(model=model) + decoder = AWSEventStreamDecoder(model=model, json_mode=json_mode) completion_stream = decoder.iter_bytes( response.iter_bytes(chunk_size=stream_chunk_size) ) @@ -1282,7 +1283,7 @@ def get_response_stream_shape(): class AWSEventStreamDecoder: - def __init__(self, model: str) -> None: + def __init__(self, model: str, json_mode: Optional[bool] = False) -> None: from botocore.parsers import EventStreamJSONParser self.model = model @@ -1290,6 +1291,8 @@ def __init__(self, model: str) -> None: self.content_blocks: List[ContentBlockDeltaEvent] = [] self.tool_calls_index: Optional[int] = None self.response_id: Optional[str] = None + self.json_mode = json_mode + self._current_tool_name: Optional[str] = None def check_empty_tool_call_args(self) -> bool: """ @@ -1391,6 +1394,16 @@ def _handle_converse_start_event( response_tool_name = get_bedrock_tool_name( response_tool_name=_response_tool_name ) + self._current_tool_name = response_tool_name + + # When json_mode is True, suppress the internal json_tool_call + # and convert its content to text in delta events instead + if ( + self.json_mode is True + and response_tool_name == RESPONSE_FORMAT_TOOL_NAME + ): + return tool_use, provider_specific_fields, thinking_blocks + self.tool_calls_index = ( 0 if self.tool_calls_index is None else self.tool_calls_index + 1 ) @@ -1445,19 +1458,27 @@ def _handle_converse_delta_event( if "text" in delta_obj: text = delta_obj["text"] elif "toolUse" in delta_obj: - tool_use = { - "id": None, - "type": "function", - "function": { - "name": None, - "arguments": delta_obj["toolUse"]["input"], - }, - "index": ( - self.tool_calls_index - if self.tool_calls_index is not None - else index - ), - } + # When json_mode is True and this is the internal json_tool_call, + # convert tool input to text content instead of tool call arguments + if ( + self.json_mode is True + and self._current_tool_name == RESPONSE_FORMAT_TOOL_NAME + ): + text = delta_obj["toolUse"]["input"] + else: + tool_use = { + "id": None, + "type": "function", + "function": { + "name": None, + "arguments": delta_obj["toolUse"]["input"], + }, + "index": ( + self.tool_calls_index + if self.tool_calls_index is not None + else index + ), + } elif "reasoningContent" in delta_obj: provider_specific_fields = { "reasoningContent": delta_obj["reasoningContent"], @@ -1494,6 +1515,17 @@ def _handle_converse_stop_event( ) -> Optional[ChatCompletionToolCallChunk]: """Handle stop/contentBlockIndex event in converse chunk parsing.""" tool_use: Optional[ChatCompletionToolCallChunk] = None + + # If the ending block was the internal json_tool_call, skip emitting + # the empty-args tool chunk and reset tracking state + if ( + self.json_mode is True + and self._current_tool_name == RESPONSE_FORMAT_TOOL_NAME + ): + self._current_tool_name = None + return tool_use + + self._current_tool_name = None is_empty = self.check_empty_tool_call_args() if is_empty: tool_use = { diff --git a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py index f6d3d3c12f7..345f3ae7c5d 100644 --- a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py +++ b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py @@ -3493,3 +3493,262 @@ def test_no_thinking_param_does_not_error(self): drop_params=False, ) assert "thinking" not in result or result.get("thinking") is None + +def test_transform_response_with_both_json_tool_call_and_real_tool(): + """ + When Bedrock returns BOTH json_tool_call AND a real tool (get_weather), + only the real tool should remain in tool_calls. The json_tool_call should be filtered out. + Fixes https://github.com/BerriAI/litellm/issues/18381 + """ + from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig + from litellm.types.utils import ModelResponse + + response_json = { + "metrics": {"latencyMs": 200}, + "output": { + "message": { + "role": "assistant", + "content": [ + { + "toolUse": { + "toolUseId": "tooluse_json_001", + "name": "json_tool_call", + "input": { + "Current_Temperature": 62, + "Weather_Explanation": "Mild and cool.", + }, + } + }, + { + "toolUse": { + "toolUseId": "tooluse_weather_001", + "name": "get_weather", + "input": { + "location": "San Francisco, CA", + "unit": "fahrenheit", + }, + } + }, + ], + } + }, + "stopReason": "tool_use", + "usage": { + "inputTokens": 100, + "outputTokens": 50, + "totalTokens": 150, + "cacheReadInputTokenCount": 0, + "cacheReadInputTokens": 0, + "cacheWriteInputTokenCount": 0, + "cacheWriteInputTokens": 0, + }, + } + + class MockResponse: + def json(self): + return response_json + + @property + def text(self): + return json.dumps(response_json) + + config = AmazonConverseConfig() + model_response = ModelResponse() + optional_params = {"json_mode": True} + + result = config._transform_response( + model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", + response=MockResponse(), + model_response=model_response, + stream=False, + logging_obj=None, + optional_params=optional_params, + api_key=None, + data=None, + messages=[], + encoding=None, + ) + + # Only real tool should remain + assert result.choices[0].message.tool_calls is not None + assert len(result.choices[0].message.tool_calls) == 1 + assert result.choices[0].message.tool_calls[0].function.name == "get_weather" + assert ( + result.choices[0].message.tool_calls[0].function.arguments + == '{"location": "San Francisco, CA", "unit": "fahrenheit"}' + ) + + # json_tool_call content should be preserved as message text + content = result.choices[0].message.content + assert content is not None + parsed = json.loads(content) + assert parsed["Current_Temperature"] == 62 + assert parsed["Weather_Explanation"] == "Mild and cool." + + +def test_transform_response_does_not_mutate_optional_params(): + """ + Verify that optional_params still contains json_mode after _transform_response. + Previously, .pop() was used which mutated the caller's dict. + """ + from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig + from litellm.types.utils import ModelResponse + + response_json = { + "metrics": {"latencyMs": 50}, + "output": { + "message": { + "role": "assistant", + "content": [ + { + "toolUse": { + "toolUseId": "tooluse_001", + "name": "json_tool_call", + "input": {"result": "ok"}, + } + } + ], + } + }, + "stopReason": "tool_use", + "usage": { + "inputTokens": 10, + "outputTokens": 5, + "totalTokens": 15, + "cacheReadInputTokenCount": 0, + "cacheReadInputTokens": 0, + "cacheWriteInputTokenCount": 0, + "cacheWriteInputTokens": 0, + }, + } + + class MockResponse: + def json(self): + return response_json + + @property + def text(self): + return json.dumps(response_json) + + config = AmazonConverseConfig() + model_response = ModelResponse() + optional_params = {"json_mode": True, "other_key": "value"} + + config._transform_response( + model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", + response=MockResponse(), + model_response=model_response, + stream=False, + logging_obj=None, + optional_params=optional_params, + api_key=None, + data=None, + messages=[], + encoding=None, + ) + + # json_mode should still be in optional_params (not popped) + assert "json_mode" in optional_params + assert optional_params["json_mode"] is True + assert optional_params["other_key"] == "value" + + +def test_streaming_filters_json_tool_call_with_real_tools(): + """ + Simulate streaming chunks where both json_tool_call and a real tool arrive. + Verify json_tool_call chunks are converted to text content while real tool + chunks pass through normally. + """ + from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder + from litellm.types.llms.bedrock import ( + ContentBlockDeltaEvent, + ContentBlockStartEvent, + ) + + decoder = AWSEventStreamDecoder(model="test-model", json_mode=True) + + # Chunk 1: json_tool_call start + json_start = ContentBlockStartEvent( + toolUse={ + "toolUseId": "tooluse_json_001", + "name": "json_tool_call", + } + ) + tool_use_1, _, _ = decoder._handle_converse_start_event(json_start) + # json_tool_call start should be suppressed (return None tool_use) + assert tool_use_1 is None + # tool_calls_index should NOT have been incremented + assert decoder.tool_calls_index is None + + # Chunk 2: json_tool_call delta — should become text, not tool_use + json_delta = ContentBlockDeltaEvent(toolUse={"input": '{"temp": 62}'}) + text_2, tool_use_2, _, _, _ = decoder._handle_converse_delta_event( + json_delta, index=0 + ) + assert text_2 == '{"temp": 62}' + assert tool_use_2 is None + + # Chunk 3: json_tool_call stop + stop_tool = decoder._handle_converse_stop_event(index=0) + assert stop_tool is None + # _current_tool_name should be reset + assert decoder._current_tool_name is None + + # Chunk 4: real tool start + real_start = ContentBlockStartEvent( + toolUse={ + "toolUseId": "tooluse_weather_001", + "name": "get_weather", + } + ) + tool_use_4, _, _ = decoder._handle_converse_start_event(real_start) + assert tool_use_4 is not None + assert tool_use_4["function"]["name"] == "get_weather" + assert decoder.tool_calls_index == 0 + + # Chunk 5: real tool delta + real_delta = ContentBlockDeltaEvent( + toolUse={"input": '{"location": "SF"}'} + ) + text_5, tool_use_5, _, _, _ = decoder._handle_converse_delta_event( + real_delta, index=1 + ) + assert text_5 == "" + assert tool_use_5 is not None + assert tool_use_5["function"]["arguments"] == '{"location": "SF"}' + + +def test_streaming_without_json_mode_passes_all_tools(): + """ + Verify backward compatibility: when json_mode=False, all tools + (including json_tool_call if present) pass through unchanged. + """ + from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder + from litellm.types.llms.bedrock import ( + ContentBlockDeltaEvent, + ContentBlockStartEvent, + ) + + decoder = AWSEventStreamDecoder(model="test-model", json_mode=False) + + # json_tool_call start — should pass through when json_mode=False + json_start = ContentBlockStartEvent( + toolUse={ + "toolUseId": "tooluse_json_001", + "name": "json_tool_call", + } + ) + tool_use, _, _ = decoder._handle_converse_start_event(json_start) + assert tool_use is not None + assert tool_use["function"]["name"] == "json_tool_call" + assert decoder.tool_calls_index == 0 + + # json_tool_call delta — should be a tool_use, not text + json_delta = ContentBlockDeltaEvent(toolUse={"input": '{"data": 1}'}) + text, tool_use_delta, _, _, _ = decoder._handle_converse_delta_event( + json_delta, index=0 + ) + assert text == "" + assert tool_use_delta is not None + assert tool_use_delta["function"]["arguments"] == '{"data": 1}' +