diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 229457a73b4..fb55a7f2448 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -91,34 +91,6 @@ "compact-2026-01-12", # The compact beta feature is not currently supported on the Converse and ConverseStream APIs ] -# Models that support Bedrock's native structured outputs API (outputConfig.textFormat) -# Uses substring matching against the Bedrock model ID -# Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/structured-output.html -BEDROCK_NATIVE_STRUCTURED_OUTPUT_MODELS = { - # Anthropic Claude 4.5+ - "claude-haiku-4-5", - "claude-sonnet-4-5", - "claude-opus-4-5", - "claude-opus-4-6", - # Qwen3 - "qwen3", - # DeepSeek - "deepseek-v3.1", - # Gemma 3 - "gemma-3", - # MiniMax - "minimax-m2", - # Mistral (magistral-small excluded: broken constrained decoding on Bedrock) - "ministral", - "mistral-large-3", - "voxtral", - # Moonshot - "kimi-k2", - # NVIDIA - "nemotron-nano", - # OpenAI (gpt-oss excluded: broken constrained decoding, works via tool-call fallback) -} - class AmazonConverseConfig(BaseConfig): """ @@ -188,8 +160,7 @@ def _convert_consecutive_user_messages_to_guarded_text( if isinstance(content, list): has_guarded_text = any( - isinstance(item, dict) and item.get("type") == "guarded_text" - for item in content + isinstance(item, dict) and item.get("type") == "guarded_text" for item in content ) if has_guarded_text: continue # Skip this message if it already has guarded_text @@ -350,13 +321,9 @@ def _is_nova_2_model(self, model: str) -> bool: # Check if the model is a Nova 2 model (matches nova-2-lite, nova-2-pro, etc.) # Also check for nova-2/ spec prefix for imported models - return model_without_region.startswith( - "amazon.nova-2-" - ) or model_without_region.startswith("nova-2/") + return model_without_region.startswith("amazon.nova-2-") or model_without_region.startswith("nova-2/") - def _map_web_search_options( - self, web_search_options: dict, model: str - ) -> Optional[BedrockToolBlock]: + def _map_web_search_options(self, web_search_options: dict, model: str) -> Optional[BedrockToolBlock]: """ Map web_search_options to Nova grounding systemTool. @@ -385,9 +352,7 @@ def _map_web_search_options( # (unlike Anthropic), so we just enable grounding with no options return BedrockToolBlock(systemTool={"name": "nova_grounding"}) - def _transform_reasoning_effort_to_reasoning_config( - self, reasoning_effort: str - ) -> dict: + def _transform_reasoning_effort_to_reasoning_config(self, reasoning_effort: str) -> dict: """ Transform reasoning_effort parameter to Nova 2 reasoningConfig structure. @@ -432,9 +397,7 @@ def _transform_reasoning_effort_to_reasoning_config( } } - def _handle_reasoning_effort_parameter( - self, model: str, reasoning_effort: str, optional_params: dict - ) -> None: + def _handle_reasoning_effort_parameter(self, model: str, reasoning_effort: str, optional_params: dict) -> None: """ Handle the reasoning_effort parameter based on the model type. @@ -471,9 +434,7 @@ def _handle_reasoning_effort_parameter( optional_params["reasoning_effort"] = reasoning_effort elif self._is_nova_2_model(model): # Nova 2 models: transform to reasoningConfig - reasoning_config = self._transform_reasoning_effort_to_reasoning_config( - reasoning_effort - ) + reasoning_config = self._transform_reasoning_effort_to_reasoning_config(reasoning_effort) optional_params.update(reasoning_config) else: # Anthropic and other models: convert to thinking parameter @@ -493,8 +454,7 @@ def _clamp_thinking_budget_tokens(optional_params: dict) -> None: budget = thinking.get("budget_tokens") if isinstance(budget, int) and budget < BEDROCK_MIN_THINKING_BUDGET_TOKENS: verbose_logger.debug( - "Bedrock requires thinking.budget_tokens >= %d, got %d. " - "Clamping to minimum.", + "Bedrock requires thinking.budget_tokens >= %d, got %d. Clamping to minimum.", BEDROCK_MIN_THINKING_BUDGET_TOKENS, budget, ) @@ -518,9 +478,7 @@ def get_supported_openai_params(self, model: str) -> List[str]: "parallel_tool_calls", ] - if ( - "arn" in model - ): # we can't infer the model from the arn, so just add all params + if "arn" in model: # we can't infer the model from the arn, so just add all params supported_params.append("tools") supported_params.append("tool_choice") supported_params.append("thinking") @@ -542,9 +500,7 @@ def get_supported_openai_params(self, model: str) -> List[str]: or base_model.startswith("meta.llama3-3") or base_model.startswith("meta.llama4") or base_model.startswith("amazon.nova") - or supports_function_calling( - model=model, custom_llm_provider=self.custom_llm_provider - ) + or supports_function_calling(model=model, custom_llm_provider=self.custom_llm_provider) ): supported_params.append("tools") @@ -554,9 +510,7 @@ def get_supported_openai_params(self, model: str) -> List[str]: if litellm.utils.supports_tool_choice( model=model, custom_llm_provider=self.custom_llm_provider - ) or litellm.utils.supports_tool_choice( - model=base_model, custom_llm_provider=self.custom_llm_provider - ): + ) or litellm.utils.supports_tool_choice(model=base_model, custom_llm_provider=self.custom_llm_provider): # only anthropic and mistral support tool choice config. otherwise (E.g. cohere) will fail the call - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html supported_params.append("tool_choice") @@ -575,9 +529,7 @@ def get_supported_openai_params(self, model: str) -> List[str]: model=model, custom_llm_provider=self.custom_llm_provider, ) - or supports_reasoning( - model=base_model, custom_llm_provider=self.custom_llm_provider - ) + or supports_reasoning(model=base_model, custom_llm_provider=self.custom_llm_provider) ): supported_params.append("thinking") supported_params.append("reasoning_effort") @@ -602,9 +554,7 @@ def map_tool_choice_values( return ToolChoiceValuesBlock(auto={}) elif isinstance(tool_choice, dict): # only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html - specific_tool = SpecificToolChoiceBlock( - name=tool_choice.get("function", {}).get("name", "") - ) + specific_tool = SpecificToolChoiceBlock(name=tool_choice.get("function", {}).get("name", "")) return ToolChoiceValuesBlock(tool=specific_tool) else: raise litellm.utils.UnsupportedParamsError( @@ -624,15 +574,9 @@ def get_supported_video_types(self) -> List[str]: return ["mp4", "mov", "mkv", "webm", "flv", "mpeg", "mpg", "wmv", "3gp"] def get_all_supported_content_types(self) -> List[str]: - return ( - self.get_supported_image_types() - + self.get_supported_document_types() - + self.get_supported_video_types() - ) + return self.get_supported_image_types() + self.get_supported_document_types() + self.get_supported_video_types() - def is_computer_use_tool_used( - self, tools: Optional[List[OpenAIChatCompletionToolParam]], model: str - ) -> bool: + def is_computer_use_tool_used(self, tools: Optional[List[OpenAIChatCompletionToolParam]], model: str) -> bool: """Check if computer use tools are being used in the request.""" if tools is None: return False @@ -645,9 +589,7 @@ def is_computer_use_tool_used( return True return False - def _transform_computer_use_tools( - self, computer_use_tools: List[OpenAIChatCompletionToolParam] - ) -> List[dict]: + def _transform_computer_use_tools(self, computer_use_tools: List[OpenAIChatCompletionToolParam]) -> List[dict]: """Transform computer use tools to Bedrock format.""" transformed_tools: List[dict] = [] @@ -689,9 +631,7 @@ def _transform_computer_use_tools( def _separate_computer_use_tools( self, tools: List[OpenAIChatCompletionToolParam], model: str - ) -> Tuple[ - List[OpenAIChatCompletionToolParam], List[OpenAIChatCompletionToolParam] - ]: + ) -> Tuple[List[OpenAIChatCompletionToolParam], List[OpenAIChatCompletionToolParam]]: """ Separate computer use tools from regular function tools. @@ -764,10 +704,27 @@ def _create_json_tool_call_for_response_format( @staticmethod def _supports_native_structured_outputs(model: str) -> bool: - """Check if the Bedrock model supports native structured outputs (outputConfig.textFormat).""" - return any( - substring in model for substring in BEDROCK_NATIVE_STRUCTURED_OUTPUT_MODELS - ) + """Check if the Bedrock model supports native structured outputs (outputConfig.textFormat). + + Looks up the ``supports_native_structured_output`` flag in + ``litellm.model_cost`` (set in the cost JSON). + Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/structured-output.html + """ + from litellm.llms.bedrock.common_utils import get_bedrock_base_model + + base_model = get_bedrock_base_model(model) + + # Try direct lookup + info = litellm.model_cost.get(base_model) + + # Try without version suffix (e.g. "model-v1:0" -> "model-v1") + if info is None and ":" in base_model: + info = litellm.model_cost.get(base_model.rsplit(":", 1)[0]) + + if info is not None: + return info.get("supports_native_structured_output", False) is True + + return False @staticmethod def _add_additional_properties_to_schema(schema: dict) -> dict: @@ -790,25 +747,18 @@ def _add_additional_properties_to_schema(schema: dict) -> dict: # Recurse into nested schemas if "properties" in result and isinstance(result["properties"], dict): result["properties"] = { - k: AmazonConverseConfig._add_additional_properties_to_schema(v) - for k, v in result["properties"].items() + k: AmazonConverseConfig._add_additional_properties_to_schema(v) for k, v in result["properties"].items() } if "items" in result and isinstance(result["items"], dict): - result["items"] = AmazonConverseConfig._add_additional_properties_to_schema( - result["items"] - ) + result["items"] = AmazonConverseConfig._add_additional_properties_to_schema(result["items"]) for defs_key in ("$defs", "definitions"): if defs_key in result and isinstance(result[defs_key], dict): result[defs_key] = { - k: AmazonConverseConfig._add_additional_properties_to_schema(v) - for k, v in result[defs_key].items() + k: AmazonConverseConfig._add_additional_properties_to_schema(v) for k, v in result[defs_key].items() } for key in ("anyOf", "allOf", "oneOf"): if key in result and isinstance(result[key], list): - result[key] = [ - AmazonConverseConfig._add_additional_properties_to_schema(item) - for item in result[key] - ] + result[key] = [AmazonConverseConfig._add_additional_properties_to_schema(item) for item in result[key]] return result @@ -838,9 +788,7 @@ def _create_output_config_for_response_format( } """ if json_schema is not None: - json_schema = AmazonConverseConfig._add_additional_properties_to_schema( - json_schema - ) + json_schema = AmazonConverseConfig._add_additional_properties_to_schema(json_schema) schema_str = json.dumps(json_schema) if json_schema is not None else "{}" json_schema_def: JsonSchemaDefinition = {"schema": schema_str} if name is not None: @@ -862,14 +810,9 @@ def _apply_tool_call_transformation( non_default_params: dict, optional_params: dict, ): - optional_params = self._add_tools_to_optional_params( - optional_params=optional_params, tools=tools - ) + optional_params = self._add_tools_to_optional_params(optional_params=optional_params, tools=tools) - if ( - "meta.llama3-3-70b-instruct-v1:0" in model - and non_default_params.get("stream", False) is True - ): + if "meta.llama3-3-70b-instruct-v1:0" in model and non_default_params.get("stream", False) is True: optional_params["fake_stream"] = True def map_openai_params( @@ -913,7 +856,9 @@ def map_openai_params( ) if param == "tool_choice": _tool_choice_value = self.map_tool_choice_values( - model=model, tool_choice=value, drop_params=drop_params # type: ignore + model=model, + tool_choice=value, + drop_params=drop_params, # type: ignore ) if _tool_choice_value is not None: optional_params["tool_choice"] = _tool_choice_value @@ -1024,14 +969,10 @@ def _translate_response_format_param( json_schema=json_schema, description=description, ) - optional_params = self._add_tools_to_optional_params( - optional_params=optional_params, tools=[_tool] - ) + optional_params = self._add_tools_to_optional_params(optional_params=optional_params, tools=[_tool]) if ( - litellm.utils.supports_tool_choice( - model=model, custom_llm_provider=self.custom_llm_provider - ) + litellm.utils.supports_tool_choice(model=model, custom_llm_provider=self.custom_llm_provider) and not is_thinking_enabled ): optional_params["tool_choice"] = ToolChoiceValuesBlock( @@ -1043,9 +984,7 @@ def _translate_response_format_param( optional_params["json_mode"] = True return optional_params - def update_optional_params_with_thinking_tokens( - self, non_default_params: dict, optional_params: dict - ): + def update_optional_params_with_thinking_tokens(self, non_default_params: dict, optional_params: dict): """ Handles scenario where max tokens is not specified. For anthropic models (anthropic api/bedrock/vertex ai), this requires having the max tokens being set and being greater than the thinking token budget. @@ -1063,13 +1002,9 @@ def update_optional_params_with_thinking_tokens( is_thinking_enabled = self.is_thinking_enabled(optional_params) is_max_tokens_in_request = self.is_max_tokens_in_request(non_default_params) if is_thinking_enabled and not is_max_tokens_in_request: - thinking_token_budget = cast(dict, optional_params["thinking"]).get( - "budget_tokens", None - ) + thinking_token_budget = cast(dict, optional_params["thinking"]).get("budget_tokens", None) if thinking_token_budget is not None: - optional_params["maxTokens"] = ( - thinking_token_budget + DEFAULT_MAX_TOKENS - ) + optional_params["maxTokens"] = thinking_token_budget + DEFAULT_MAX_TOKENS @overload def _get_cache_point_block( @@ -1135,23 +1070,15 @@ def _transform_system_message( if message["role"] == "system": system_prompt_indices.append(idx) if isinstance(message["content"], str) and message["content"]: - system_content_blocks.append( - SystemContentBlock(text=message["content"]) - ) - cache_block = self._get_cache_point_block( - message, block_type="system", model=model - ) + system_content_blocks.append(SystemContentBlock(text=message["content"])) + cache_block = self._get_cache_point_block(message, block_type="system", model=model) if cache_block: system_content_blocks.append(cache_block) elif isinstance(message["content"], list): for m in message["content"]: if m.get("type") == "text" and m.get("text"): - system_content_blocks.append( - SystemContentBlock(text=m["text"]) - ) - cache_block = self._get_cache_point_block( - m, block_type="system", model=model - ) + system_content_blocks.append(SystemContentBlock(text=m["text"])) + cache_block = self._get_cache_point_block(m, block_type="system", model=model) if cache_block: system_content_blocks.append(cache_block) if len(system_prompt_indices) > 0: @@ -1189,16 +1116,10 @@ def _prepare_request_params( # Exceptions should not be stored in optional_params (this is a defensive fix) cleaned_params = filter_exceptions_from_params(optional_params) inference_params = safe_deep_copy(cleaned_params) - supported_converse_params = list( - AmazonConverseConfig.__annotations__.keys() - ) + ["top_k"] + supported_converse_params = list(AmazonConverseConfig.__annotations__.keys()) + ["top_k"] supported_tool_call_params = ["tools", "tool_choice"] supported_config_params = list(self.get_config_blocks().keys()) - total_supported_params = ( - supported_converse_params - + supported_tool_call_params - + supported_config_params - ) + total_supported_params = supported_converse_params + supported_tool_call_params + supported_config_params inference_params.pop("json_mode", None) # used for handling json_schema # Anthropic-only key. Bedrock expects `outputConfig` (camelCase) and # will reject `output_config` if it leaks through pass-through routes. @@ -1209,25 +1130,15 @@ def _prepare_request_params( if request_metadata is not None: self._validate_request_metadata(request_metadata) - output_config: Optional[OutputConfigBlock] = inference_params.pop( - "outputConfig", None - ) - inference_params.pop( - "output_config", None - ) # Bedrock Converse doesn't support it + output_config: Optional[OutputConfigBlock] = inference_params.pop("outputConfig", None) + inference_params.pop("output_config", None) # Bedrock Converse doesn't support it # keep supported params in 'inference_params', and set all model-specific params in 'additional_request_params' - additional_request_params = { - k: v for k, v in inference_params.items() if k not in total_supported_params - } - inference_params = { - k: v for k, v in inference_params.items() if k in total_supported_params - } + additional_request_params = {k: v for k, v in inference_params.items() if k not in total_supported_params} + inference_params = {k: v for k, v in inference_params.items() if k in total_supported_params} # Handle parallel_tool_calls configuration - parallel_tool_use_config = additional_request_params.pop( - "_parallel_tool_use_config", None - ) + parallel_tool_use_config = additional_request_params.pop("_parallel_tool_use_config", None) if parallel_tool_use_config is not None and is_claude_4_5_on_bedrock(model): for key, value in parallel_tool_use_config.items(): if ( @@ -1242,9 +1153,7 @@ def _prepare_request_params( additional_request_params.pop("parallel_tool_calls", None) # Only set the topK value in for models that support it - additional_request_params.update( - self._handle_top_k_value(model, inference_params) - ) + additional_request_params.update(self._handle_top_k_value(model, inference_params)) # Filter out internal/MCP-related parameters that shouldn't be sent to the API # These are LiteLLM internal parameters, not API parameters @@ -1253,9 +1162,7 @@ def _prepare_request_params( # Filter out non-serializable objects (exceptions, callables, logging objects, etc.) # from additional_request_params to prevent JSON serialization errors # This filters: Exception objects, callable objects (functions), Logging objects, etc. - additional_request_params = filter_exceptions_from_params( - additional_request_params - ) + additional_request_params = filter_exceptions_from_params(additional_request_params) return ( inference_params, @@ -1302,9 +1209,7 @@ def _process_tools_and_beta( # Only separate tools if computer use tools are actually present if filtered_tools and self.is_computer_use_tool_used(filtered_tools, model): # Separate computer use tools from regular function tools - computer_use_tools, regular_tools = self._separate_computer_use_tools( - filtered_tools, model - ) + computer_use_tools, regular_tools = self._separate_computer_use_tools(filtered_tools, model) # Process regular function tools using existing logic bedrock_tools = _bedrock_tools_pt(regular_tools) @@ -1365,9 +1270,7 @@ def _process_tools_and_beta( anthropic_beta_list.append(computer_use_header) # Transform computer use tools to proper Bedrock format - transformed_computer_tools = self._transform_computer_use_tools( - computer_use_tools - ) + transformed_computer_tools = self._transform_computer_use_tools(computer_use_tools) additional_request_params["tools"] = transformed_computer_tools else: # No computer use tools, process all tools as regular tools @@ -1396,15 +1299,9 @@ def _transform_request_helper( """ Bedrock doesn't support tool calling without `tools=` param specified. """ - if ( - "tools" not in optional_params - and messages is not None - and has_tool_call_blocks(messages) - ): + if "tools" not in optional_params and messages is not None and has_tool_call_blocks(messages): if litellm.modify_params: - optional_params["tools"] = add_dummy_tool( - custom_llm_provider="bedrock_converse" - ) + optional_params["tools"] = add_dummy_tool(custom_llm_provider="bedrock_converse") else: raise litellm.UnsupportedParamsError( message="Bedrock doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.", @@ -1448,9 +1345,7 @@ def _transform_request_helper( bedrock_tool_config: Optional[ToolConfigBlock] = None if len(bedrock_tools) > 0: - tool_choice_values: ToolChoiceValuesBlock = inference_params.pop( - "tool_choice", None - ) + tool_choice_values: ToolChoiceValuesBlock = inference_params.pop("tool_choice", None) bedrock_tool_config = ToolConfigBlock( tools=bedrock_tools, ) @@ -1460,9 +1355,7 @@ def _transform_request_helper( data: CommonRequestObject = { "additionalModelRequestFields": additional_request_params, "system": system_content_blocks, - "inferenceConfig": self._transform_inference_params( - inference_params=inference_params - ), + "inferenceConfig": self._transform_inference_params(inference_params=inference_params), } # Handle all config blocks @@ -1492,14 +1385,10 @@ async def _async_transform_request( litellm_params: dict, headers: Optional[dict] = None, ) -> RequestObject: - messages, system_content_blocks = self._transform_system_message( - messages, model=model - ) + messages, system_content_blocks = self._transform_system_message(messages, model=model) # Convert last user message to guarded_text if guardrailConfig is present - messages = self._convert_consecutive_user_messages_to_guarded_text( - messages, optional_params - ) + messages = self._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) ## TRANSFORMATION ## _data: CommonRequestObject = self._transform_request_helper( @@ -1510,13 +1399,11 @@ async def _async_transform_request( headers=headers, ) - bedrock_messages = ( - await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model=model, - llm_provider="bedrock_converse", - user_continue_message=litellm_params.pop("user_continue_message", None), - ) + bedrock_messages = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( + messages=messages, + model=model, + llm_provider="bedrock_converse", + user_continue_message=litellm_params.pop("user_continue_message", None), ) data: RequestObject = {"messages": bedrock_messages, **_data} @@ -1550,14 +1437,10 @@ def _transform_request( litellm_params: dict, headers: Optional[dict] = None, ) -> RequestObject: - messages, system_content_blocks = self._transform_system_message( - messages, model=model - ) + messages, system_content_blocks = self._transform_system_message(messages, model=model) # Convert last user message to guarded_text if guardrailConfig is present - messages = self._convert_consecutive_user_messages_to_guarded_text( - messages, optional_params - ) + messages = self._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) _data: CommonRequestObject = self._transform_request_helper( model=model, @@ -1606,9 +1489,7 @@ def transform_response( encoding=encoding, ) - def _transform_reasoning_content( - self, reasoning_content_blocks: List[BedrockConverseReasoningContentBlock] - ) -> str: + def _transform_reasoning_content(self, reasoning_content_blocks: List[BedrockConverseReasoningContentBlock]) -> str: """ Extract the reasoning text from the reasoning content blocks @@ -1624,9 +1505,7 @@ def _transform_thinking_blocks( self, thinking_blocks: List[BedrockConverseReasoningContentBlock] ) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]: """Return a consistent format for thinking blocks between Anthropic and Bedrock.""" - thinking_blocks_list: List[ - Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock] - ] = [] + thinking_blocks_list: List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] = [] for block in thinking_blocks: if "reasoningText" in block: _thinking_block = ChatCompletionThinkingBlock(type="thinking") @@ -1662,21 +1541,11 @@ def _transform_usage( cache_creation_input_tokens = usage["cacheWriteInputTokens"] input_tokens += cache_creation_input_tokens - prompt_tokens_details = PromptTokensDetailsWrapper( - cached_tokens=cache_read_input_tokens - ) - reasoning_tokens = ( - token_counter(text=reasoning_content, count_response_tokens=True) - if reasoning_content - else 0 - ) + prompt_tokens_details = PromptTokensDetailsWrapper(cached_tokens=cache_read_input_tokens) + reasoning_tokens = token_counter(text=reasoning_content, count_response_tokens=True) if reasoning_content else 0 completion_tokens_details = CompletionTokensDetailsWrapper( reasoning_tokens=reasoning_tokens, - text_tokens=( - output_tokens - reasoning_tokens - if reasoning_tokens > 0 - else output_tokens - ), + text_tokens=(output_tokens - reasoning_tokens if reasoning_tokens > 0 else output_tokens), ) openai_usage = Usage( prompt_tokens=input_tokens, @@ -1691,9 +1560,7 @@ def _transform_usage( def get_tool_call_names( self, - tools: Optional[ - Union[List[ToolBlock], List[OpenAIChatCompletionToolParam]] - ] = None, + tools: Optional[Union[List[ToolBlock], List[OpenAIChatCompletionToolParam]]] = None, ) -> List[str]: if tools is None: return [] @@ -1732,13 +1599,8 @@ def apply_tool_call_transformation_if_needed( try: tool_call_names = self.get_tool_call_names(tools) json_content = json.loads(message.content) - if ( - json_content.get("type") == "function" - and json_content.get("name") in tool_call_names - ): - tool_calls = [ - ChatCompletionMessageToolCall(function=Function(**json_content)) - ] + if json_content.get("type") == "function" and json_content.get("name") in tool_call_names: + tool_calls = [ChatCompletionMessageToolCall(function=Function(**json_content))] message.tool_calls = tool_calls message.content = None @@ -1767,9 +1629,7 @@ def _translate_message_content( """ content_str = "" tools: List[ChatCompletionToolCallChunk] = [] - reasoningContentBlocks: Optional[ - List[BedrockConverseReasoningContentBlock] - ] = None + reasoningContentBlocks: Optional[List[BedrockConverseReasoningContentBlock]] = None citationsContentBlocks: Optional[List[CitationsContentBlock]] = None for idx, content in enumerate(content_blocks): """ @@ -1786,9 +1646,7 @@ def _translate_message_content( if "toolUse" in content: ## check tool name was formatted by litellm _response_tool_name = content["toolUse"]["name"] - response_tool_name = get_bedrock_tool_name( - response_tool_name=_response_tool_name - ) + response_tool_name = get_bedrock_tool_name(response_tool_name=_response_tool_name) _function_chunk = ChatCompletionToolCallFunctionChunk( name=response_tool_name, arguments=json.dumps(content["toolUse"]["input"]), @@ -1839,11 +1697,7 @@ def _unwrap_bedrock_properties(json_str: str) -> str: """ try: response_data = json.loads(json_str) - if ( - isinstance(response_data, dict) - and "properties" in response_data - and len(response_data) == 1 - ): + if isinstance(response_data, dict) and "properties" in response_data and len(response_data) == 1: response_data = response_data["properties"] return json.dumps(response_data) except json.JSONDecodeError: @@ -1867,11 +1721,7 @@ def _filter_json_mode_tools( if not json_mode or not tools: return tools if tools else None - json_tool_indices = [ - i - for i, t in enumerate(tools) - if t["function"].get("name") == RESPONSE_FORMAT_TOOL_NAME - ] + json_tool_indices = [i for i, t in enumerate(tools) if t["function"].get("name") == RESPONSE_FORMAT_TOOL_NAME] if not json_tool_indices: # No json_tool_call found, return tools unchanged @@ -1879,14 +1729,10 @@ def _filter_json_mode_tools( if len(json_tool_indices) == len(tools): # All tools are json_tool_call — convert first one to content - verbose_logger.debug( - "Processing JSON tool call response for response_format" - ) + verbose_logger.debug("Processing JSON tool call response for response_format") json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments") if json_mode_content_str is not None: - json_mode_content_str = AmazonConverseConfig._unwrap_bedrock_properties( - json_mode_content_str - ) + json_mode_content_str = AmazonConverseConfig._unwrap_bedrock_properties(json_mode_content_str) chat_completion_message["content"] = json_mode_content_str return None @@ -1896,13 +1742,9 @@ def _filter_json_mode_tools( first_idx = json_tool_indices[0] json_mode_args = tools[first_idx]["function"].get("arguments") if json_mode_args is not None: - json_mode_args = AmazonConverseConfig._unwrap_bedrock_properties( - json_mode_args - ) + json_mode_args = AmazonConverseConfig._unwrap_bedrock_properties(json_mode_args) existing = chat_completion_message.get("content") or "" - chat_completion_message["content"] = ( - existing + json_mode_args if existing else json_mode_args - ) + chat_completion_message["content"] = existing + json_mode_args if existing else json_mode_args real_tools = [t for i, t in enumerate(tools) if i not in json_tool_indices] return real_tools if real_tools else None @@ -1980,9 +1822,7 @@ def _transform_response( # noqa: PLR0915 chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"} content_str = "" tools: List[ChatCompletionToolCallChunk] = [] - reasoningContentBlocks: Optional[ - List[BedrockConverseReasoningContentBlock] - ] = None + reasoningContentBlocks: Optional[List[BedrockConverseReasoningContentBlock]] = None citationsContentBlocks: Optional[List[CitationsContentBlock]] = None if message is not None: @@ -2001,17 +1841,11 @@ def _transform_response( # noqa: PLR0915 provider_specific_fields["citationsContent"] = citationsContentBlocks if provider_specific_fields: - chat_completion_message[ - "provider_specific_fields" - ] = provider_specific_fields + chat_completion_message["provider_specific_fields"] = provider_specific_fields if reasoningContentBlocks is not None: - chat_completion_message[ - "reasoning_content" - ] = self._transform_reasoning_content(reasoningContentBlocks) - chat_completion_message[ - "thinking_blocks" - ] = self._transform_thinking_blocks(reasoningContentBlocks) + chat_completion_message["reasoning_content"] = self._transform_reasoning_content(reasoningContentBlocks) + chat_completion_message["thinking_blocks"] = self._transform_thinking_blocks(reasoningContentBlocks) chat_completion_message["content"] = content_str filtered_tools = self._filter_json_mode_tools( json_mode=json_mode, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 879dd42be47..34261ba2391 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -722,7 +722,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "anthropic.claude-haiku-4-5@20251001": { "cache_creation_input_token_cost": 1.25e-06, @@ -745,7 +746,8 @@ "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 346, - "supports_native_streaming": true + "supports_native_streaming": true, + "supports_native_structured_output": true }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, @@ -967,7 +969,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, @@ -997,7 +1000,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, @@ -1027,7 +1031,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, @@ -1057,7 +1062,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, @@ -1087,7 +1093,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "au.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, @@ -1117,7 +1124,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 3.75e-06, @@ -1147,7 +1155,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 3.75e-06, @@ -1177,7 +1186,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 4.125e-06, @@ -1207,7 +1217,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 4.125e-06, @@ -1237,7 +1248,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "au.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 4.125e-06, @@ -1267,7 +1279,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -1327,7 +1340,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "anthropic.claude-v1": { "input_cost_per_token": 8e-06, @@ -1577,7 +1591,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "apac.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 3e-06, @@ -1665,7 +1680,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "azure/ada": { "input_cost_per_token": 1e-07, @@ -11737,7 +11753,8 @@ "output_cost_per_token": 1.68e-06, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "deepseek.v3.2": { "input_cost_per_token": 6.2e-07, @@ -12182,7 +12199,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, @@ -12396,7 +12414,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.meta.llama3-2-1b-instruct-v1:0": { "input_cost_per_token": 1.3e-07, @@ -14626,18 +14645,6 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "uses_embed_content": true }, - "vertex_ai/gemini-embedding-2-preview": { - "input_cost_per_token": 1.5e-07, - "litellm_provider": "vertex_ai", - "max_input_tokens": 8192, - "max_tokens": 8192, - "mode": "embedding", - "output_cost_per_token": 0, - "output_vector_size": 3072, - "source": "https://ai.google.dev/gemini-api/docs/embeddings#multimodal", - "supports_multimodal": true, - "uses_embed_content": true - }, "gemini/gemini-embedding-001": { "input_cost_per_token": 1.5e-07, "litellm_provider": "gemini", @@ -16765,7 +16772,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -16817,7 +16825,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 7.5e-08, @@ -20420,7 +20429,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "jp.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 1.375e-06, @@ -20442,7 +20452,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "lambda_ai/deepseek-llama3.3-70b": { "input_cost_per_token": 2e-07, @@ -21137,7 +21148,8 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 1.2e-06, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "minimax.minimax-m2.1": { "input_cost_per_token": 3e-07, @@ -21293,7 +21305,8 @@ "mode": "chat", "output_cost_per_token": 2e-07, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.ministral-3-3b-instruct": { "input_cost_per_token": 1e-07, @@ -21304,7 +21317,8 @@ "mode": "chat", "output_cost_per_token": 1e-07, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.ministral-3-8b-instruct": { "input_cost_per_token": 1.5e-07, @@ -21315,7 +21329,8 @@ "mode": "chat", "output_cost_per_token": 1.5e-07, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.mistral-7b-instruct-v0:2": { "input_cost_per_token": 1.5e-07, @@ -21357,7 +21372,8 @@ "mode": "chat", "output_cost_per_token": 1.5e-06, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.mistral-small-2402-v1:0": { "input_cost_per_token": 1e-06, @@ -21388,7 +21404,8 @@ "mode": "chat", "output_cost_per_token": 4e-08, "supports_audio_input": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.voxtral-small-24b-2507": { "input_cost_per_token": 1e-07, @@ -21399,7 +21416,8 @@ "mode": "chat", "output_cost_per_token": 3e-07, "supports_audio_input": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral/codestral-2405": { "input_cost_per_token": 1e-06, @@ -22086,7 +22104,8 @@ "mode": "chat", "output_cost_per_token": 2.5e-06, "supports_reasoning": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "moonshotai.kimi-k2.5": { "input_cost_per_token": 6e-07, @@ -22961,7 +22980,8 @@ "supports_function_calling": true, "supports_system_messages": true, "supports_tool_choice": true, - "source": "https://aws.amazon.com/bedrock/pricing/" + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_native_structured_output": true }, "o1": { "cache_read_input_token_cost": 7.5e-06, @@ -26045,7 +26065,8 @@ "output_cost_per_token": 1.8e-06, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-235b-a22b-2507-v1:0": { "input_cost_per_token": 2.2e-07, @@ -26057,7 +26078,8 @@ "output_cost_per_token": 8.8e-07, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-coder-30b-a3b-v1:0": { "input_cost_per_token": 1.5e-07, @@ -26069,7 +26091,8 @@ "output_cost_per_token": 6e-07, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-32b-v1:0": { "input_cost_per_token": 1.5e-07, @@ -26081,7 +26104,8 @@ "output_cost_per_token": 6e-07, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-next-80b-a3b": { "input_cost_per_token": 1.5e-07, @@ -26092,7 +26116,8 @@ "mode": "chat", "output_cost_per_token": 1.2e-06, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "qwen.qwen3-vl-235b-a22b": { "input_cost_per_token": 5.3e-07, @@ -26104,7 +26129,8 @@ "output_cost_per_token": 2.66e-06, "supports_function_calling": true, "supports_system_messages": true, - "supports_vision": true + "supports_vision": true, + "supports_native_structured_output": true }, "qwen.qwen3-coder-next": { "input_cost_per_token": 5e-07, @@ -28129,7 +28155,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, @@ -28287,7 +28314,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "au.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 1.375e-06, @@ -28308,7 +28336,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-opus-4-20250514-v1:0": { "cache_creation_input_token_cost": 1.875e-05, @@ -28360,7 +28389,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "global.anthropic.claude-opus-4-5-20251101-v1:0": { "cache_creation_input_token_cost": 6.25e-06, @@ -28386,7 +28416,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "eu.anthropic.claude-opus-4-5-20251101-v1:0": { "cache_creation_input_token_cost": 6.25e-06, @@ -28412,7 +28443,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "us.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -31013,7 +31045,9 @@ "mode": "chat", "output_cost_per_token": 3.2e-06, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#glm-models", - "supported_regions": ["global"], + "supported_regions": [ + "global" + ], "supports_function_calling": true, "supports_prompt_caching": true, "supports_reasoning": true, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 879dd42be47..34261ba2391 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -722,7 +722,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "anthropic.claude-haiku-4-5@20251001": { "cache_creation_input_token_cost": 1.25e-06, @@ -745,7 +746,8 @@ "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 346, - "supports_native_streaming": true + "supports_native_streaming": true, + "supports_native_structured_output": true }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, @@ -967,7 +969,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, @@ -997,7 +1000,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.25e-06, @@ -1027,7 +1031,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, @@ -1057,7 +1062,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, @@ -1087,7 +1093,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "au.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 6.875e-06, @@ -1117,7 +1124,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 3.75e-06, @@ -1147,7 +1155,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 3.75e-06, @@ -1177,7 +1186,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 4.125e-06, @@ -1207,7 +1217,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 4.125e-06, @@ -1237,7 +1248,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "au.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 4.125e-06, @@ -1267,7 +1279,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -1327,7 +1340,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "anthropic.claude-v1": { "input_cost_per_token": 8e-06, @@ -1577,7 +1591,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "apac.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 3e-06, @@ -1665,7 +1680,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "azure/ada": { "input_cost_per_token": 1e-07, @@ -11737,7 +11753,8 @@ "output_cost_per_token": 1.68e-06, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "deepseek.v3.2": { "input_cost_per_token": 6.2e-07, @@ -12182,7 +12199,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, @@ -12396,7 +12414,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "eu.meta.llama3-2-1b-instruct-v1:0": { "input_cost_per_token": 1.3e-07, @@ -14626,18 +14645,6 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "uses_embed_content": true }, - "vertex_ai/gemini-embedding-2-preview": { - "input_cost_per_token": 1.5e-07, - "litellm_provider": "vertex_ai", - "max_input_tokens": 8192, - "max_tokens": 8192, - "mode": "embedding", - "output_cost_per_token": 0, - "output_vector_size": 3072, - "source": "https://ai.google.dev/gemini-api/docs/embeddings#multimodal", - "supports_multimodal": true, - "uses_embed_content": true - }, "gemini/gemini-embedding-001": { "input_cost_per_token": 1.5e-07, "litellm_provider": "gemini", @@ -16765,7 +16772,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -16817,7 +16825,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "global.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 7.5e-08, @@ -20420,7 +20429,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "jp.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 1.375e-06, @@ -20442,7 +20452,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "lambda_ai/deepseek-llama3.3-70b": { "input_cost_per_token": 2e-07, @@ -21137,7 +21148,8 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 1.2e-06, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "minimax.minimax-m2.1": { "input_cost_per_token": 3e-07, @@ -21293,7 +21305,8 @@ "mode": "chat", "output_cost_per_token": 2e-07, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.ministral-3-3b-instruct": { "input_cost_per_token": 1e-07, @@ -21304,7 +21317,8 @@ "mode": "chat", "output_cost_per_token": 1e-07, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.ministral-3-8b-instruct": { "input_cost_per_token": 1.5e-07, @@ -21315,7 +21329,8 @@ "mode": "chat", "output_cost_per_token": 1.5e-07, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.mistral-7b-instruct-v0:2": { "input_cost_per_token": 1.5e-07, @@ -21357,7 +21372,8 @@ "mode": "chat", "output_cost_per_token": 1.5e-06, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.mistral-small-2402-v1:0": { "input_cost_per_token": 1e-06, @@ -21388,7 +21404,8 @@ "mode": "chat", "output_cost_per_token": 4e-08, "supports_audio_input": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral.voxtral-small-24b-2507": { "input_cost_per_token": 1e-07, @@ -21399,7 +21416,8 @@ "mode": "chat", "output_cost_per_token": 3e-07, "supports_audio_input": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "mistral/codestral-2405": { "input_cost_per_token": 1e-06, @@ -22086,7 +22104,8 @@ "mode": "chat", "output_cost_per_token": 2.5e-06, "supports_reasoning": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "moonshotai.kimi-k2.5": { "input_cost_per_token": 6e-07, @@ -22961,7 +22980,8 @@ "supports_function_calling": true, "supports_system_messages": true, "supports_tool_choice": true, - "source": "https://aws.amazon.com/bedrock/pricing/" + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_native_structured_output": true }, "o1": { "cache_read_input_token_cost": 7.5e-06, @@ -26045,7 +26065,8 @@ "output_cost_per_token": 1.8e-06, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-235b-a22b-2507-v1:0": { "input_cost_per_token": 2.2e-07, @@ -26057,7 +26078,8 @@ "output_cost_per_token": 8.8e-07, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-coder-30b-a3b-v1:0": { "input_cost_per_token": 1.5e-07, @@ -26069,7 +26091,8 @@ "output_cost_per_token": 6e-07, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-32b-v1:0": { "input_cost_per_token": 1.5e-07, @@ -26081,7 +26104,8 @@ "output_cost_per_token": 6e-07, "supports_function_calling": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_structured_output": true }, "qwen.qwen3-next-80b-a3b": { "input_cost_per_token": 1.5e-07, @@ -26092,7 +26116,8 @@ "mode": "chat", "output_cost_per_token": 1.2e-06, "supports_function_calling": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_native_structured_output": true }, "qwen.qwen3-vl-235b-a22b": { "input_cost_per_token": 5.3e-07, @@ -26104,7 +26129,8 @@ "output_cost_per_token": 2.66e-06, "supports_function_calling": true, "supports_system_messages": true, - "supports_vision": true + "supports_vision": true, + "supports_native_structured_output": true }, "qwen.qwen3-coder-next": { "input_cost_per_token": 5e-07, @@ -28129,7 +28155,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, @@ -28287,7 +28314,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "au.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 1.375e-06, @@ -28308,7 +28336,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true }, "us.anthropic.claude-opus-4-20250514-v1:0": { "cache_creation_input_token_cost": 1.875e-05, @@ -28360,7 +28389,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "global.anthropic.claude-opus-4-5-20251101-v1:0": { "cache_creation_input_token_cost": 6.25e-06, @@ -28386,7 +28416,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "eu.anthropic.claude-opus-4-5-20251101-v1:0": { "cache_creation_input_token_cost": 6.25e-06, @@ -28412,7 +28443,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true }, "us.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -31013,7 +31045,9 @@ "mode": "chat", "output_cost_per_token": 3.2e-06, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#glm-models", - "supported_regions": ["global"], + "supported_regions": [ + "global" + ], "supports_function_calling": true, "supports_prompt_caching": true, "supports_reasoning": true, diff --git a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py index a305009659c..65825419d43 100644 --- a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py +++ b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py @@ -6,9 +6,7 @@ import pytest from fastapi.testclient import TestClient -sys.path.insert( - 0, os.path.abspath("../../../../..") -) # Adds the parent directory to the system path +sys.path.insert(0, os.path.abspath("../../../../..")) # Adds the parent directory to the system path from unittest.mock import MagicMock, patch import litellm @@ -37,10 +35,7 @@ def test_transform_usage(): ) assert openai_usage.completion_tokens == usage["outputTokens"] assert openai_usage.total_tokens == usage["totalTokens"] - assert ( - openai_usage.prompt_tokens_details.cached_tokens - == usage["cacheReadInputTokens"] - ) + assert openai_usage.prompt_tokens_details.cached_tokens == usage["cacheReadInputTokens"] assert openai_usage._cache_creation_input_tokens == usage["cacheWriteInputTokens"] assert openai_usage._cache_read_input_tokens == usage["cacheReadInputTokens"] # completion_tokens_details should always be populated @@ -194,14 +189,10 @@ def test_apply_tool_call_transformation_if_needed(): role="user", content=json.dumps(tool_response), ) - transformed_message, _ = config.apply_tool_call_transformation_if_needed( - message, tool_calls - ) + transformed_message, _ = config.apply_tool_call_transformation_if_needed(message, tool_calls) assert len(transformed_message.tool_calls) == 1 assert transformed_message.tool_calls[0].function.name == "test_function" - assert transformed_message.tool_calls[0].function.arguments == json.dumps( - tool_response["parameters"] - ) + assert transformed_message.tool_calls[0].function.arguments == json.dumps(tool_response["parameters"]) def test_transform_tool_call_with_cache_control(): @@ -250,12 +241,7 @@ def test_transform_tool_call_with_cache_control(): print(function_out_msg) assert function_out_msg["toolSpec"]["name"] == "get_location" assert function_out_msg["toolSpec"]["description"] == "Get the user's location" - assert ( - function_out_msg["toolSpec"]["inputSchema"]["json"]["properties"]["location"][ - "type" - ] - == "string" - ) + assert function_out_msg["toolSpec"]["inputSchema"]["json"]["properties"]["location"]["type"] == "string" transformed_cache_msg = result["toolConfig"]["tools"][1] assert "cachePoint" in transformed_cache_msg @@ -285,6 +271,7 @@ def test_reasoning_with_forced_tool_choice_switches_to_auto(): assert optional_params["tool_choice"] == {"auto": {}} + def test_get_supported_openai_params(): config = AmazonConverseConfig() supported_params = config.get_supported_openai_params( @@ -307,15 +294,13 @@ def test_get_supported_openai_params_bedrock_converse(): for model in litellm.BEDROCK_CONVERSE_MODELS: print(f"Testing model: {model}") config = AmazonConverseConfig() - supported_params_without_prefix = config.get_supported_openai_params( - model=model - ) + supported_params_without_prefix = config.get_supported_openai_params(model=model) - supported_params_with_prefix = config.get_supported_openai_params( - model=f"bedrock/converse/{model}" - ) + supported_params_with_prefix = config.get_supported_openai_params(model=f"bedrock/converse/{model}") - assert set(supported_params_without_prefix) == set(supported_params_with_prefix), f"Supported params mismatch for model: {model}. Without prefix: {supported_params_without_prefix}, With prefix: {supported_params_with_prefix}" + assert set(supported_params_without_prefix) == set(supported_params_with_prefix), ( + f"Supported params mismatch for model: {model}. Without prefix: {supported_params_without_prefix}, With prefix: {supported_params_with_prefix}" + ) print(f"✅ Passed for model: {model}") @@ -382,7 +367,7 @@ def test_transform_response_with_computer_use_tool(): }, } } - ] + ], } }, "stopReason": "tool_use", @@ -396,10 +381,12 @@ def test_transform_response_with_computer_use_tool(): "cacheWriteInputTokens": 0, }, } + # Mock httpx.Response class MockResponse: def json(self): return response_json + @property def text(self): return json.dumps(response_json) @@ -468,12 +455,10 @@ def test_transform_response_with_bash_tool(): "toolUse": { "toolUseId": "tooluse_456", "name": "bash", - "input": { - "command": "ls -la *.py" - }, + "input": {"command": "ls -la *.py"}, } } - ] + ], } }, "stopReason": "tool_use", @@ -487,10 +472,12 @@ def test_transform_response_with_bash_tool(): "cacheWriteInputTokens": 0, }, } + # Mock httpx.Response class MockResponse: def json(self): return response_json + @property def text(self): return json.dumps(response_json) @@ -549,10 +536,11 @@ def test_transform_response_with_structured_response_being_called(): "name": "json_tool_call", "input": { "Current_Temperature": 62, - "Weather_Explanation": "San Francisco typically has mild, cool weather year-round due to its coastal location and marine influence. The city is known for its fog, moderate temperatures, and relatively stable climate with little seasonal variation."}, + "Weather_Explanation": "San Francisco typically has mild, cool weather year-round due to its coastal location and marine influence. The city is known for its fog, moderate temperatures, and relatively stable climate with little seasonal variation.", + }, } } - ] + ], } }, "stopReason": "tool_use", @@ -566,10 +554,12 @@ def test_transform_response_with_structured_response_being_called(): "cacheWriteInputTokens": 0, }, } + # Mock httpx.Response class MockResponse: def json(self): return response_json + @property def text(self): return json.dumps(response_json) @@ -580,49 +570,43 @@ def text(self): "json_mode": True, "tools": [ { - 'type': 'function', - 'function': { - 'name': 'get_weather', - 'description': 'Get the current weather in a given location', - 'parameters': { - 'type': 'object', - 'properties': { - 'location': { - 'type': 'string', - 'description': 'The city and state, e.g. San Francisco, CA' - }, - 'unit': { - 'type': 'string', - 'enum': ['celsius', 'fahrenheit'] - } + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, - 'required': ['location'] - } - } + "required": ["location"], + }, + }, }, { - 'type': 'function', - 'function': { - 'name': 'json_tool_call', - 'parameters': { - '$schema': 'http://json-schema.org/draft-07/schema#', - 'type': 'object', - 'required': ['Weather_Explanation', 'Current_Temperature'], - 'properties': { - 'Weather_Explanation': { - 'type': ['string', 'null'], - 'description': '1-2 sentences explaining the weather in the location' + "type": "function", + "function": { + "name": "json_tool_call", + "parameters": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["Weather_Explanation", "Current_Temperature"], + "properties": { + "Weather_Explanation": { + "type": ["string", "null"], + "description": "1-2 sentences explaining the weather in the location", + }, + "Current_Temperature": { + "type": ["number", "null"], + "description": "Current temperature in the location", }, - 'Current_Temperature': { - 'type': ['number', 'null'], - 'description': 'Current temperature in the location' - } }, - 'additionalProperties': False - } - } - } - ] + "additionalProperties": False, + }, + }, + }, + ], } # Call the transformation logic result = config._transform_response( @@ -641,7 +625,11 @@ def text(self): assert result.choices[0].message.tool_calls is None assert result.choices[0].message.content is not None - assert result.choices[0].message.content == '{"Current_Temperature": 62, "Weather_Explanation": "San Francisco typically has mild, cool weather year-round due to its coastal location and marine influence. The city is known for its fog, moderate temperatures, and relatively stable climate with little seasonal variation."}' + assert ( + result.choices[0].message.content + == '{"Current_Temperature": 62, "Weather_Explanation": "San Francisco typically has mild, cool weather year-round due to its coastal location and marine influence. The city is known for its fog, moderate temperatures, and relatively stable climate with little seasonal variation."}' + ) + def test_transform_response_with_structured_response_calling_tool(): """Test response transformation with structured response.""" @@ -650,28 +638,20 @@ def test_transform_response_with_structured_response_calling_tool(): # Simulate a Bedrock Converse response with a bash tool call response_json = { - "metrics": { - "latencyMs": 1148 - }, + "metrics": {"latencyMs": 1148}, "output": { - "message": - { + "message": { "content": [ - { - "text": "I\'ll check the current weather in San Francisco for you." - }, + {"text": "I'll check the current weather in San Francisco for you."}, { "toolUse": { - "input": { - "location": "San Francisco, CA", - "unit": "celsius" - }, + "input": {"location": "San Francisco, CA", "unit": "celsius"}, "name": "get_weather", - "toolUseId": "tooluse_oKk__QrqSUmufMw3Q7vGaQ" + "toolUseId": "tooluse_oKk__QrqSUmufMw3Q7vGaQ", } - } + }, ], - "role": "assistant" + "role": "assistant", } }, "stopReason": "tool_use", @@ -682,13 +662,15 @@ def test_transform_response_with_structured_response_calling_tool(): "cacheWriteInputTokens": 0, "inputTokens": 534, "outputTokens": 69, - "totalTokens": 603 - } + "totalTokens": 603, + }, } + # Mock httpx.Response class MockResponse: def json(self): return response_json + @property def text(self): return json.dumps(response_json) @@ -699,49 +681,43 @@ def text(self): "json_mode": True, "tools": [ { - 'type': 'function', - 'function': { - 'name': 'get_weather', - 'description': 'Get the current weather in a given location', - 'parameters': { - 'type': 'object', - 'properties': { - 'location': { - 'type': 'string', - 'description': 'The city and state, e.g. San Francisco, CA' - }, - 'unit': { - 'type': 'string', - 'enum': ['celsius', 'fahrenheit'] - } + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, - 'required': ['location'] - } - } + "required": ["location"], + }, + }, }, { - 'type': 'function', - 'function': { - 'name': 'json_tool_call', - 'parameters': { - '$schema': 'http://json-schema.org/draft-07/schema#', - 'type': 'object', - 'required': ['Weather_Explanation', 'Current_Temperature'], - 'properties': { - 'Weather_Explanation': { - 'type': ['string', 'null'], - 'description': '1-2 sentences explaining the weather in the location' + "type": "function", + "function": { + "name": "json_tool_call", + "parameters": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["Weather_Explanation", "Current_Temperature"], + "properties": { + "Weather_Explanation": { + "type": ["string", "null"], + "description": "1-2 sentences explaining the weather in the location", + }, + "Current_Temperature": { + "type": ["number", "null"], + "description": "Current temperature in the location", }, - 'Current_Temperature': { - 'type': ['number', 'null'], - 'description': 'Current temperature in the location' - } }, - 'additionalProperties': False - } - } - } - ] + "additionalProperties": False, + }, + }, + }, + ], } # Call the transformation logic result = config._transform_response( @@ -760,7 +736,10 @@ def text(self): assert result.choices[0].message.tool_calls is not None assert len(result.choices[0].message.tool_calls) == 1 assert result.choices[0].message.tool_calls[0].function.name == "get_weather" - assert result.choices[0].message.tool_calls[0].function.arguments == '{"location": "San Francisco, CA", "unit": "celsius"}' + assert ( + result.choices[0].message.tool_calls[0].function.arguments + == '{"location": "San Francisco, CA", "unit": "celsius"}' + ) @pytest.mark.asyncio @@ -775,12 +754,7 @@ async def test_bedrock_bash_tool_acompletion(): } ] - messages = [ - { - "role": "user", - "content": "run ls command and find all python files" - } - ] + messages = [{"role": "user", "content": "run ls command and find all python files"}] try: response = await litellm.acompletion( @@ -788,7 +762,7 @@ async def test_bedrock_bash_tool_acompletion(): messages=messages, tools=tools, # Using dummy API key - test should fail with auth error, proving request formatting works - api_key="dummy-key-for-testing" + api_key="dummy-key-for-testing", ) # If we get here, something's wrong - we expect an auth error assert False, "Expected authentication error but got successful response" @@ -797,8 +771,16 @@ async def test_bedrock_bash_tool_acompletion(): # Check if it's an expected authentication/credentials error auth_error_indicators = [ - "credentials", "authentication", "unauthorized", "access denied", - "aws", "region", "profile", "token", "invalid", "signature" + "credentials", + "authentication", + "unauthorized", + "access denied", + "aws", + "region", + "profile", + "token", + "invalid", + "signature", ] if any(auth_error in error_str for auth_error in auth_error_indicators): @@ -828,17 +810,14 @@ async def test_bedrock_computer_use_acompletion(): { "role": "user", "content": [ - { - "type": "text", - "text": "Go to the bedrock console" - }, + {"type": "text", "text": "Go to the bedrock console"}, { "type": "image_url", "image_url": { "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" - } - } - ] + }, + }, + ], } ] @@ -848,7 +827,7 @@ async def test_bedrock_computer_use_acompletion(): messages=messages, tools=tools, # Using dummy API key - test should fail with auth error, proving request formatting works - api_key="dummy-key-for-testing" + api_key="dummy-key-for-testing", ) # If we get here, something's wrong - we expect an auth error assert False, "Expected authentication error but got successful response" @@ -857,8 +836,16 @@ async def test_bedrock_computer_use_acompletion(): # Check if it's an expected authentication/credentials error auth_error_indicators = [ - "credentials", "authentication", "unauthorized", "access denied", - "aws", "region", "profile", "token", "invalid", "signature" + "credentials", + "authentication", + "unauthorized", + "access denied", + "aws", + "region", + "profile", + "token", + "invalid", + "signature", ] if any(auth_error in error_str for auth_error in auth_error_indicators): @@ -886,15 +873,10 @@ async def test_transformation_directly(): { "type": "bash_20241022", "name": "bash", - } + }, ] - messages = [ - { - "role": "user", - "content": "run ls command and find all python files" - } - ] + messages = [{"role": "user", "content": "run ls command and find all python files"}] # Transform request request_data = config.transform_request( @@ -902,7 +884,7 @@ async def test_transformation_directly(): messages=messages, optional_params={"tools": tools}, litellm_params={}, - headers={} + headers={}, ) # Verify the structure @@ -994,16 +976,11 @@ def test_transform_request_with_multiple_tools(): }, "required": ["location"], }, - } - } + }, + }, ] - messages = [ - { - "role": "user", - "content": "run ls command and find all python files" - } - ] + messages = [{"role": "user", "content": "run ls command and find all python files"}] # Transform request request_data = config.transform_request( @@ -1011,7 +988,7 @@ def test_transform_request_with_multiple_tools(): messages=messages, optional_params={"tools": tools}, litellm_params={}, - headers={} + headers={}, ) # Verify the structure @@ -1054,17 +1031,14 @@ def test_transform_request_with_computer_tool_only(): { "role": "user", "content": [ - { - "type": "text", - "text": "Go to the bedrock console" - }, + {"type": "text", "text": "Go to the bedrock console"}, { "type": "image_url", "image_url": { "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" - } - } - ] + }, + }, + ], } ] @@ -1074,7 +1048,7 @@ def test_transform_request_with_computer_tool_only(): messages=messages, optional_params={"tools": tools}, litellm_params={}, - headers={} + headers={}, ) # Verify the structure @@ -1102,12 +1076,7 @@ def test_transform_request_with_bash_tool_only(): } ] - messages = [ - { - "role": "user", - "content": "run ls command and find all python files" - } - ] + messages = [{"role": "user", "content": "run ls command and find all python files"}] # Transform request request_data = config.transform_request( @@ -1115,7 +1084,7 @@ def test_transform_request_with_bash_tool_only(): messages=messages, optional_params={"tools": tools}, litellm_params={}, - headers={} + headers={}, ) # Verify the structure @@ -1143,12 +1112,7 @@ def test_transform_request_with_text_editor_tool(): } ] - messages = [ - { - "role": "user", - "content": "Edit this text file" - } - ] + messages = [{"role": "user", "content": "Edit this text file"}] # Transform request request_data = config.transform_request( @@ -1156,7 +1120,7 @@ def test_transform_request_with_text_editor_tool(): messages=messages, optional_params={"tools": tools}, litellm_params={}, - headers={} + headers={}, ) # Verify the structure @@ -1194,16 +1158,11 @@ def test_transform_request_with_function_tool(): }, "required": ["location"], }, - } + }, } ] - messages = [ - { - "role": "user", - "content": "What's the weather like in San Francisco?" - } - ] + messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}] # Transform request request_data = config.transform_request( @@ -1211,7 +1170,7 @@ def test_transform_request_with_function_tool(): messages=messages, optional_params={"tools": tools}, litellm_params={}, - headers={} + headers={}, ) # Verify the structure @@ -1247,7 +1206,7 @@ def test_map_openai_params_with_response_format(): }, "required": ["location"], }, - } + }, } ] @@ -1279,7 +1238,7 @@ def test_map_openai_params_with_response_format(): non_default_params={"response_format": json_schema}, optional_params={"tools": tools}, model="eu.anthropic.claude-sonnet-4-20250514-v1:0", - drop_params=False + drop_params=False, ) assert "tools" in optional_params @@ -1299,31 +1258,21 @@ async def test_assistant_message_cache_control(): # Test assistant message with string content and cache_control messages = [ {"role": "user", "content": "Hello"}, - { - "role": "assistant", - "content": "Hi there!", - "cache_control": {"type": "ephemeral"} - } + {"role": "assistant", "content": "Hi there!", "cache_control": {"type": "ephemeral"}}, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result @@ -1353,26 +1302,16 @@ async def test_assistant_message_list_content_cache_control(): {"role": "user", "content": "Hello"}, { "role": "assistant", - "content": [ - { - "type": "text", - "text": "This should be cached", - "cache_control": {"type": "ephemeral"} - } - ] - } + "content": [{"type": "text", "text": "This should be cached", "cache_control": {"type": "ephemeral"}}], + }, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result @@ -1399,36 +1338,22 @@ async def test_tool_message_cache_control(): "role": "assistant", "content": None, "tool_calls": [ - { - "id": "call_123", - "type": "function", - "function": {"name": "get_weather", "arguments": "{}"} - } - ] + {"id": "call_123", "type": "function", "function": {"name": "get_weather", "arguments": "{}"}} + ], }, { "role": "tool", "tool_call_id": "call_123", - "content": [ - { - "type": "text", - "text": "Weather data: sunny, 25°C", - "cache_control": {"type": "ephemeral"} - } - ] - } + "content": [{"type": "text", "text": "Weather data: sunny, 25°C", "cache_control": {"type": "ephemeral"}}], + }, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result @@ -1463,31 +1388,23 @@ async def test_tool_message_string_content_cache_control(): "role": "assistant", "content": None, "tool_calls": [ - { - "id": "call_123", - "type": "function", - "function": {"name": "get_weather", "arguments": "{}"} - } - ] + {"id": "call_123", "type": "function", "function": {"name": "get_weather", "arguments": "{}"}} + ], }, { "role": "tool", "tool_call_id": "call_123", "content": "Weather: sunny, 25°C", - "cache_control": {"type": "ephemeral"} - } + "cache_control": {"type": "ephemeral"}, + }, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result @@ -1523,22 +1440,18 @@ async def test_assistant_tool_calls_cache_control(): "id": "call_proxy_123", "type": "function", "function": {"name": "calc", "arguments": "{}"}, - "cache_control": {"type": "ephemeral"} + "cache_control": {"type": "ephemeral"}, } - ] - } + ], + }, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result @@ -1575,28 +1488,24 @@ async def test_multiple_tool_calls_with_mixed_cache_control(): "id": "call_1", "type": "function", "function": {"name": "calc", "arguments": '{"expr": "2+2"}'}, - "cache_control": {"type": "ephemeral"} + "cache_control": {"type": "ephemeral"}, }, { "id": "call_2", "type": "function", - "function": {"name": "calc", "arguments": '{"expr": "3+3"}'} + "function": {"name": "calc", "arguments": '{"expr": "3+3"}'}, # No cache_control - } - ] - } + }, + ], + }, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result @@ -1632,20 +1541,16 @@ async def test_no_cache_control_no_cache_point(): { "role": "tool", "tool_call_id": "call_123", - "content": "Tool result" # No cache_control - } + "content": "Tool result", # No cache_control + }, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) async_result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) assert result == async_result @@ -1665,6 +1570,7 @@ async def test_no_cache_control_no_cache_point(): # Guarded Text Feature Tests # ============================================================================ + def test_guarded_text_wraps_in_guardrail_converse_content(): """Test that guarded_text content type gets wrapped in guardContent blocks.""" from litellm.litellm_core_utils.prompt_templates.factory import ( @@ -1677,15 +1583,13 @@ def test_guarded_text_wraps_in_guardrail_converse_content(): "content": [ {"type": "text", "text": "Regular text content"}, {"type": "guarded_text", "text": "This should be guarded"}, - {"type": "text", "text": "More regular text"} - ] + {"type": "text", "text": "More regular text"}, + ], } ] result = _bedrock_converse_messages_pt( - messages=messages, - model="us.amazon.nova-pro-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="us.amazon.nova-pro-v1:0", llm_provider="bedrock_converse" ) # Should have 1 message @@ -1705,6 +1609,7 @@ def test_guarded_text_wraps_in_guardrail_converse_content(): assert "guardContent" in content[1] assert content[1]["guardContent"]["text"]["text"] == "This should be guarded" + def test_guarded_text_with_system_messages(): """Test guarded_text with system messages using the full transformation.""" config = AmazonConverseConfig() @@ -1715,24 +1620,22 @@ def test_guarded_text_with_system_messages(): "role": "user", "content": [ {"type": "text", "text": "What is the main topic of this legal document?"}, - {"type": "guarded_text", "text": "This is a set of very long instructions that you will follow. Here is a legal document that you will use to answer the user's question."} - ] - } + { + "type": "guarded_text", + "text": "This is a set of very long instructions that you will follow. Here is a legal document that you will use to answer the user's question.", + }, + ], + }, ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "DRAFT" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "DRAFT"}} result = config._transform_request( model="us.amazon.nova-pro-v1:0", messages=messages, optional_params=optional_params, litellm_params={}, - headers={} + headers={}, ) # Should have system content blocks @@ -1755,7 +1658,10 @@ def test_guarded_text_with_system_messages(): assert content[0]["text"] == "What is the main topic of this legal document?" # Second should be guardContent assert "guardContent" in content[1] - assert content[1]["guardContent"]["text"]["text"] == "This is a set of very long instructions that you will follow. Here is a legal document that you will use to answer the user's question." + assert ( + content[1]["guardContent"]["text"]["text"] + == "This is a set of very long instructions that you will follow. Here is a legal document that you will use to answer the user's question." + ) def test_guarded_text_with_mixed_content_types(): @@ -1770,15 +1676,13 @@ def test_guarded_text_with_mixed_content_types(): "content": [ {"type": "text", "text": "Look at this image"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,test"}}, - {"type": "guarded_text", "text": "This sensitive content should be guarded"} - ] + {"type": "guarded_text", "text": "This sensitive content should be guarded"}, + ], } ] result = _bedrock_converse_messages_pt( - messages=messages, - model="us.amazon.nova-pro-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="us.amazon.nova-pro-v1:0", llm_provider="bedrock_converse" ) # Should have 1 message @@ -1800,6 +1704,7 @@ def test_guarded_text_with_mixed_content_types(): assert "guardContent" in content[2] assert content[2]["guardContent"]["text"]["text"] == "This sensitive content should be guarded" + @pytest.mark.asyncio async def test_async_guarded_text(): """Test async version of guarded_text processing.""" @@ -1810,17 +1715,12 @@ async def test_async_guarded_text(): messages = [ { "role": "user", - "content": [ - {"type": "text", "text": "Hello"}, - {"type": "guarded_text", "text": "This should be guarded"} - ] + "content": [{"type": "text", "text": "Hello"}, {"type": "guarded_text", "text": "This should be guarded"}], } ] result = await BedrockConverseMessagesProcessor._bedrock_converse_messages_pt_async( - messages=messages, - model="us.amazon.nova-pro-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="us.amazon.nova-pro-v1:0", llm_provider="bedrock_converse" ) # Should have 1 message @@ -1851,31 +1751,21 @@ def test_guarded_text_with_tool_calls(): "role": "user", "content": [ {"type": "text", "text": "What's the weather?"}, - {"type": "guarded_text", "text": "Please be careful with sensitive information"} - ] + {"type": "guarded_text", "text": "Please be careful with sensitive information"}, + ], }, { "role": "assistant", "content": None, "tool_calls": [ - { - "id": "call_123", - "type": "function", - "function": {"name": "get_weather", "arguments": "{}"} - } - ] + {"id": "call_123", "type": "function", "function": {"name": "get_weather", "arguments": "{}"}} + ], }, - { - "role": "tool", - "tool_call_id": "call_123", - "content": "It's sunny and 25°C" - } + {"role": "tool", "tool_call_id": "call_123", "content": "It's sunny and 25°C"}, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="us.amazon.nova-pro-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="us.amazon.nova-pro-v1:0", llm_provider="bedrock_converse" ) # Should have 3 messages @@ -1909,26 +1799,18 @@ def test_guarded_text_guardrail_config_preserved(): messages = [ { "role": "user", - "content": [ - {"type": "text", "text": "Hello"}, - {"type": "guarded_text", "text": "This should be guarded"} - ] + "content": [{"type": "text", "text": "Hello"}, {"type": "guarded_text", "text": "This should be guarded"}], } ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "DRAFT" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "DRAFT"}} result = config._transform_request( model="us.amazon.nova-pro-v1:0", messages=messages, optional_params=optional_params, litellm_params={}, - headers={} + headers={}, ) # GuardrailConfig should be present at top level @@ -1946,23 +1828,10 @@ def test_auto_convert_last_user_message_to_guarded_text(): config = AmazonConverseConfig() messages = [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What is the main topic of this legal document?" - } - ] - } + {"role": "user", "content": [{"type": "text", "text": "What is the main topic of this legal document?"}]} ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -1979,19 +1848,9 @@ def test_auto_convert_last_user_message_string_content(): """Test that last user message with string content is automatically converted to guarded_text when guardrailConfig is present.""" config = AmazonConverseConfig() - messages = [ - { - "role": "user", - "content": "What is the main topic of this legal document?" - } - ] + messages = [{"role": "user", "content": "What is the main topic of this legal document?"}] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -2009,15 +1868,7 @@ def test_no_conversion_when_no_guardrail_config(): config = AmazonConverseConfig() messages = [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What is the main topic of this legal document?" - } - ] - } + {"role": "user", "content": [{"type": "text", "text": "What is the main topic of this legal document?"}]} ] optional_params = {} @@ -2033,24 +1884,9 @@ def test_no_conversion_when_guarded_text_already_present(): """Test that no conversion happens when guarded_text is already present in the last user message.""" config = AmazonConverseConfig() - messages = [ - { - "role": "user", - "content": [ - { - "type": "guarded_text", - "text": "This is already guarded" - } - ] - } - ] + messages = [{"role": "user", "content": [{"type": "guarded_text", "text": "This is already guarded"}]}] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -2067,24 +1903,13 @@ def test_auto_convert_with_mixed_content(): { "role": "user", "content": [ - { - "type": "text", - "text": "What is the main topic of this legal document?" - }, - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.jpg"} - } - ] + {"type": "text", "text": "What is the main topic of this legal document?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}, + ], } ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -2108,23 +1933,10 @@ def test_auto_convert_in_full_transformation(): config = AmazonConverseConfig() messages = [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What is the main topic of this legal document?" - } - ] - } + {"role": "user", "content": [{"type": "text", "text": "What is the main topic of this legal document?"}]} ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the full transformation result = config._transform_request( @@ -2132,7 +1944,7 @@ def test_auto_convert_in_full_transformation(): messages=messages, optional_params=optional_params, litellm_params={}, - headers={} + headers={}, ) # Verify the transformation worked @@ -2152,45 +1964,13 @@ def test_convert_consecutive_user_messages_to_guarded_text(): config = AmazonConverseConfig() messages = [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "First user message" - } - ] - }, - { - "role": "assistant", - "content": "Assistant response" - }, - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Second user message" - } - ] - }, - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Third user message" - } - ] - } + {"role": "user", "content": [{"type": "text", "text": "First user message"}]}, + {"role": "assistant", "content": "Assistant response"}, + {"role": "user", "content": [{"type": "text", "text": "Second user message"}]}, + {"role": "user", "content": [{"type": "text", "text": "Third user message"}]}, ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -2223,41 +2003,12 @@ def test_convert_all_user_messages_when_all_consecutive(): config = AmazonConverseConfig() messages = [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "First user message" - } - ] - }, - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Second user message" - } - ] - }, - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Third user message" - } - ] - } + {"role": "user", "content": [{"type": "text", "text": "First user message"}]}, + {"role": "user", "content": [{"type": "text", "text": "Second user message"}]}, + {"role": "user", "content": [{"type": "text", "text": "Third user message"}]}, ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -2279,26 +2030,12 @@ def test_convert_consecutive_user_messages_with_string_content(): config = AmazonConverseConfig() messages = [ - { - "role": "assistant", - "content": "Assistant response" - }, - { - "role": "user", - "content": "First user message" - }, - { - "role": "user", - "content": "Second user message" - } + {"role": "assistant", "content": "Assistant response"}, + {"role": "user", "content": "First user message"}, + {"role": "user", "content": "Second user message"}, ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -2327,32 +2064,11 @@ def test_skip_consecutive_user_messages_with_existing_guarded_text(): config = AmazonConverseConfig() messages = [ - { - "role": "user", - "content": [ - { - "type": "guarded_text", - "text": "Already guarded" - } - ] - }, - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Should be converted" - } - ] - } + {"role": "user", "content": [{"type": "guarded_text", "text": "Already guarded"}]}, + {"role": "user", "content": [{"type": "text", "text": "Should be converted"}]}, ] - optional_params = { - "guardrailConfig": { - "guardrailIdentifier": "gr-abc123", - "guardrailVersion": "1" - } - } + optional_params = {"guardrailConfig": {"guardrailIdentifier": "gr-abc123", "guardrailVersion": "1"}} # Test the helper method directly converted_messages = config._convert_consecutive_user_messages_to_guarded_text(messages, optional_params) @@ -2384,11 +2100,7 @@ def test_request_metadata_transformation(): """Test that requestMetadata is properly transformed to top-level field.""" config = AmazonConverseConfig() - request_metadata = { - "cost_center": "engineering", - "user_id": "user123", - "session_id": "sess_abc123" - } + request_metadata = {"cost_center": "engineering", "user_id": "user123", "session_id": "sess_abc123"} messages = [ {"role": "user", "content": "Hello!"}, @@ -2400,7 +2112,7 @@ def test_request_metadata_transformation(): messages=messages, optional_params={"requestMetadata": request_metadata}, litellm_params={}, - headers={} + headers={}, ) # Verify that requestMetadata appears as top-level field @@ -2426,7 +2138,7 @@ def test_request_metadata_validation(): messages=messages, optional_params={"requestMetadata": valid_metadata}, litellm_params={}, - headers={} + headers={}, ) # Test too many items (max 16) @@ -2438,7 +2150,7 @@ def test_request_metadata_validation(): messages=messages, optional_params={"requestMetadata": too_many_items}, litellm_params={}, - headers={} + headers={}, ) assert False, "Should have raised validation error for too many items" except Exception as e: @@ -2461,7 +2173,7 @@ def test_request_metadata_key_constraints(): messages=messages, optional_params={"requestMetadata": invalid_metadata}, litellm_params={}, - headers={} + headers={}, ) assert False, "Should have raised validation error for key too long" except Exception as e: @@ -2476,7 +2188,7 @@ def test_request_metadata_key_constraints(): messages=messages, optional_params={"requestMetadata": invalid_metadata}, litellm_params={}, - headers={} + headers={}, ) assert False, "Should have raised validation error for empty key" except Exception as e: @@ -2499,7 +2211,7 @@ def test_request_metadata_value_constraints(): messages=messages, optional_params={"requestMetadata": invalid_metadata}, litellm_params={}, - headers={} + headers={}, ) assert False, "Should have raised validation error for value too long" except Exception as e: @@ -2514,7 +2226,7 @@ def test_request_metadata_value_constraints(): messages=messages, optional_params={"requestMetadata": valid_metadata}, litellm_params={}, - headers={} + headers={}, ) @@ -2537,7 +2249,7 @@ def test_request_metadata_character_pattern(): messages=messages, optional_params={"requestMetadata": valid_metadata}, litellm_params={}, - headers={} + headers={}, ) @@ -2545,10 +2257,7 @@ def test_request_metadata_with_other_params(): """Test that requestMetadata works alongside other parameters.""" config = AmazonConverseConfig() - request_metadata = { - "experiment": "test_A", - "user_type": "premium" - } + request_metadata = {"experiment": "test_A", "user_type": "premium"} messages = [ {"role": "user", "content": "What's the weather?"}, @@ -2562,12 +2271,10 @@ def test_request_metadata_with_other_params(): "description": "Get the current weather", "parameters": { "type": "object", - "properties": { - "location": {"type": "string"} - }, - "required": ["location"] - } - } + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, } ] @@ -2575,14 +2282,9 @@ def test_request_metadata_with_other_params(): request_data = config.transform_request( model="anthropic.claude-3-5-sonnet-20240620-v1:0", messages=messages, - optional_params={ - "requestMetadata": request_metadata, - "tools": tools, - "max_tokens": 100, - "temperature": 0.7 - }, + optional_params={"requestMetadata": request_metadata, "tools": tools, "max_tokens": 100, "temperature": 0.7}, litellm_params={}, - headers={} + headers={}, ) # Verify requestMetadata is at top level @@ -2607,7 +2309,7 @@ def test_request_metadata_empty(): messages=messages, optional_params={"requestMetadata": {}}, litellm_params={}, - headers={} + headers={}, ) assert "requestMetadata" in request_data @@ -2626,7 +2328,7 @@ def test_request_metadata_not_provided(): messages=messages, optional_params={}, litellm_params={}, - headers={} + headers={}, ) # requestMetadata should not be in the request @@ -2649,16 +2351,14 @@ def test_empty_assistant_message_handling(): messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": ""}, # Empty content - {"role": "user", "content": "How are you?"} + {"role": "user", "content": "How are you?"}, ] # Use patch to ensure we modify the litellm reference that factory.py actually uses # This avoids issues with module reloading during parallel test execution with patch.object(factory_module.litellm, "modify_params", True): result = _bedrock_converse_messages_pt( - messages=messages, - model="anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) # Should have 3 messages: user, assistant (with placeholder), user @@ -2676,13 +2376,11 @@ def test_empty_assistant_message_handling(): messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": " "}, # Whitespace-only content - {"role": "user", "content": "How are you?"} + {"role": "user", "content": "How are you?"}, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) # Assistant message should have placeholder text instead of whitespace @@ -2693,13 +2391,11 @@ def test_empty_assistant_message_handling(): messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": [{"type": "text", "text": ""}]}, # Empty text in list - {"role": "user", "content": "How are you?"} + {"role": "user", "content": "How are you?"}, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) # Assistant message should have placeholder text instead of empty text @@ -2710,13 +2406,11 @@ def test_empty_assistant_message_handling(): messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "I'm doing well, thank you!"}, # Normal content - {"role": "user", "content": "How are you?"} + {"role": "user", "content": "How are you?"}, ] result = _bedrock_converse_messages_pt( - messages=messages, - model="anthropic.claude-3-5-sonnet-20240620-v1:0", - llm_provider="bedrock_converse" + messages=messages, model="anthropic.claude-3-5-sonnet-20240620-v1:0", llm_provider="bedrock_converse" ) # Assistant message should keep original content @@ -2828,6 +2522,7 @@ def test_thinking_with_max_completion_tokens(): assert result["thinking"]["type"] == "enabled" assert result["thinking"]["budget_tokens"] == 5000 + def test_drop_thinking_param_when_thinking_blocks_missing(): """ Test that thinking param is dropped when modify_params=True and @@ -2868,24 +2563,21 @@ def test_drop_thinking_param_when_thinking_blocks_missing(): optional_params = {"thinking": {"type": "enabled", "budget_tokens": 1000}} # Verify the condition is detected - assert last_assistant_with_tool_calls_has_no_thinking_blocks( - messages_without_thinking_blocks - ), "Should detect missing thinking_blocks" + assert last_assistant_with_tool_calls_has_no_thinking_blocks(messages_without_thinking_blocks), ( + "Should detect missing thinking_blocks" + ) # Simulate what _transform_request_helper does if ( optional_params.get("thinking") is not None and messages_without_thinking_blocks is not None - and last_assistant_with_tool_calls_has_no_thinking_blocks( - messages_without_thinking_blocks - ) + and last_assistant_with_tool_calls_has_no_thinking_blocks(messages_without_thinking_blocks) ): if litellm.modify_params: optional_params.pop("thinking", None) assert "thinking" not in optional_params, ( - "thinking param should be dropped when modify_params=True " - "and thinking_blocks are missing" + "thinking param should be dropped when modify_params=True and thinking_blocks are missing" ) # Test case 2: thinking should NOT be dropped when thinking_blocks are present @@ -2901,29 +2593,23 @@ def test_drop_thinking_param_when_thinking_blocks_missing(): "function": {"name": "search", "arguments": "{}"}, } ], - "thinking_blocks": [ - {"type": "thinking", "thinking": "Let me search for weather..."} - ], + "thinking_blocks": [{"type": "thinking", "thinking": "Let me search for weather..."}], }, {"role": "tool", "content": "Weather is sunny", "tool_call_id": "call_123"}, ] - optional_params_with_thinking = { - "thinking": {"type": "enabled", "budget_tokens": 1000} - } + optional_params_with_thinking = {"thinking": {"type": "enabled", "budget_tokens": 1000}} # Verify the condition is NOT detected when thinking_blocks are present - assert not last_assistant_with_tool_calls_has_no_thinking_blocks( - messages_with_thinking_blocks - ), "Should NOT detect missing thinking_blocks when they are present" + assert not last_assistant_with_tool_calls_has_no_thinking_blocks(messages_with_thinking_blocks), ( + "Should NOT detect missing thinking_blocks when they are present" + ) # Simulate what _transform_request_helper does if ( optional_params_with_thinking.get("thinking") is not None and messages_with_thinking_blocks is not None - and last_assistant_with_tool_calls_has_no_thinking_blocks( - messages_with_thinking_blocks - ) + and last_assistant_with_tool_calls_has_no_thinking_blocks(messages_with_thinking_blocks) ): if litellm.modify_params: optional_params_with_thinking.pop("thinking", None) @@ -2935,24 +2621,18 @@ def test_drop_thinking_param_when_thinking_blocks_missing(): # Test case 3: thinking should NOT be dropped when modify_params=False litellm.modify_params = False - optional_params_no_modify = { - "thinking": {"type": "enabled", "budget_tokens": 1000} - } + optional_params_no_modify = {"thinking": {"type": "enabled", "budget_tokens": 1000}} # Simulate what _transform_request_helper does if ( optional_params_no_modify.get("thinking") is not None and messages_without_thinking_blocks is not None - and last_assistant_with_tool_calls_has_no_thinking_blocks( - messages_without_thinking_blocks - ) + and last_assistant_with_tool_calls_has_no_thinking_blocks(messages_without_thinking_blocks) ): if litellm.modify_params: optional_params_no_modify.pop("thinking", None) - assert "thinking" in optional_params_no_modify, ( - "thinking param should NOT be dropped when modify_params=False" - ) + assert "thinking" in optional_params_no_modify, "thinking param should NOT be dropped when modify_params=False" finally: # Restore original modify_params setting @@ -2960,46 +2640,55 @@ def test_drop_thinking_param_when_thinking_blocks_missing(): def test_supports_native_structured_outputs(): - """Test model detection for native structured outputs support.""" - config = AmazonConverseConfig() + """Test model detection for native structured outputs support. - # Supported models - assert config._supports_native_structured_outputs( - "anthropic.claude-sonnet-4-5-20250929-v1:0" - ) - assert config._supports_native_structured_outputs( - "anthropic.claude-haiku-4-5-20251001-v1:0" - ) - assert config._supports_native_structured_outputs( - "anthropic.claude-opus-4-6-v1:0" - ) - assert config._supports_native_structured_outputs( - "eu.anthropic.claude-opus-4-5-20260101-v1:0" - ) - assert config._supports_native_structured_outputs("qwen.qwen3-235b-instruct-v1:0") - assert config._supports_native_structured_outputs("mistral.mistral-large-3-v1:0") - assert config._supports_native_structured_outputs("deepseek.deepseek-v3.1-v1:0") + Support is driven by the ``supports_native_structured_output`` flag in the + cost JSON (litellm.model_cost), not a hardcoded model set. + """ + old_env = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP") + old_cost = litellm.model_cost + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + try: + config = AmazonConverseConfig() - # Unsupported models — should fall back to tool-call approach - assert not config._supports_native_structured_outputs( - "anthropic.claude-3-5-sonnet-20241022-v2:0" - ) - assert not config._supports_native_structured_outputs( - "anthropic.claude-sonnet-4-20250514-v1:0" - ) - assert not config._supports_native_structured_outputs( - "meta.llama3-3-70b-instruct-v1:0" - ) - assert not config._supports_native_structured_outputs( - "amazon.nova-pro-v1:0" - ) - # Excluded despite AWS listing them: broken constrained decoding on Bedrock - assert not config._supports_native_structured_outputs( - "openai.gpt-oss-120b-1:0" - ) - assert not config._supports_native_structured_outputs( - "mistral.magistral-small-2509" - ) + # Supported models (have supports_native_structured_output=true in cost JSON) + assert config._supports_native_structured_outputs("anthropic.claude-sonnet-4-5-20250929-v1:0") + assert config._supports_native_structured_outputs("anthropic.claude-haiku-4-5-20251001-v1:0") + assert config._supports_native_structured_outputs("anthropic.claude-opus-4-6-v1") + # Version suffix (:0) is stripped when looking up models without it in cost JSON + assert config._supports_native_structured_outputs("anthropic.claude-opus-4-6-v1:0") + # Regional prefix is stripped by get_bedrock_base_model + assert config._supports_native_structured_outputs("eu.anthropic.claude-opus-4-5-20251101-v1:0") + # Claude 4.6 Sonnet + assert config._supports_native_structured_outputs("anthropic.claude-sonnet-4-6") + assert config._supports_native_structured_outputs("us.anthropic.claude-sonnet-4-6") + # Non-Anthropic models + assert config._supports_native_structured_outputs("qwen.qwen3-235b-a22b-2507-v1:0") + assert config._supports_native_structured_outputs("mistral.mistral-large-3-675b-instruct") + assert config._supports_native_structured_outputs("minimax.minimax-m2") + assert config._supports_native_structured_outputs("moonshot.kimi-k2-thinking") + assert config._supports_native_structured_outputs("nvidia.nemotron-nano-3-30b") + # DeepSeek: old substring "deepseek-v3.1" didn't match real ID + assert config._supports_native_structured_outputs("deepseek.v3-v1:0") + + # Unsupported models -- should fall back to tool-call approach + assert not config._supports_native_structured_outputs("anthropic.claude-3-5-sonnet-20241022-v2:0") + assert not config._supports_native_structured_outputs("anthropic.claude-sonnet-4-20250514-v1:0") + assert not config._supports_native_structured_outputs("meta.llama3-3-70b-instruct-v1:0") + assert not config._supports_native_structured_outputs("amazon.nova-pro-v1:0") + # Excluded: broken constrained decoding on Bedrock + assert not config._supports_native_structured_outputs("openai.gpt-oss-120b-1:0") + assert not config._supports_native_structured_outputs("mistral.magistral-small-2509") + # Excluded: ignores schema or broken on Bedrock + assert not config._supports_native_structured_outputs("google.gemma-3-27b-it") + assert not config._supports_native_structured_outputs("nvidia.nemotron-nano-12b-v2") + finally: + litellm.model_cost = old_cost + if old_env is None: + os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None) + else: + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = old_env def test_create_output_config_for_response_format(): @@ -3078,10 +2767,7 @@ def test_translate_response_format_native_output_config(): parsed_schema = json.loads(schema_str) expected_schema = {**response_format["json_schema"]["schema"], "additionalProperties": False} assert parsed_schema == expected_schema - assert ( - result["outputConfig"]["textFormat"]["structure"]["jsonSchema"]["name"] - == "WeatherResult" - ) + assert result["outputConfig"]["textFormat"]["structure"]["jsonSchema"]["name"] == "WeatherResult" def test_translate_response_format_fallback_tool_call(): @@ -3118,42 +2804,53 @@ def test_translate_response_format_fallback_tool_call(): def test_native_structured_output_no_fake_stream(): """When using native structured outputs with streaming, fake_stream should NOT be set.""" - config = AmazonConverseConfig() + old_env = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP") + old_cost = litellm.model_cost + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + try: + config = AmazonConverseConfig() - response_format = { - "type": "json_schema", - "json_schema": { - "name": "Result", - "schema": { - "type": "object", - "properties": { - "answer": {"type": "string"}, + response_format = { + "type": "json_schema", + "json_schema": { + "name": "Result", + "schema": { + "type": "object", + "properties": { + "answer": {"type": "string"}, + }, }, }, - }, - } - - optional_params: dict = {} - result = config._translate_response_format_param( - value=response_format, - model="anthropic.claude-sonnet-4-5-20250929-v1:0", - optional_params=optional_params, - non_default_params={"response_format": response_format, "stream": True}, - is_thinking_enabled=False, - ) + } - assert "outputConfig" in result - assert result["json_mode"] is True - # No fake_stream for native approach - assert "fake_stream" not in result + optional_params: dict = {} + result = config._translate_response_format_param( + value=response_format, + model="anthropic.claude-sonnet-4-5-20250929-v1:0", + optional_params=optional_params, + non_default_params={"response_format": response_format, "stream": True}, + is_thinking_enabled=False, + ) - # Verify the schema content - schema_str = result["outputConfig"]["textFormat"]["structure"]["jsonSchema"]["schema"] - assert json.loads(schema_str) == { - "type": "object", - "properties": {"answer": {"type": "string"}}, - "additionalProperties": False, - } + assert "outputConfig" in result + assert result["json_mode"] is True + # No fake_stream for native approach + assert "fake_stream" not in result + + # Verify the schema content + schema_str = result["outputConfig"]["textFormat"]["structure"]["jsonSchema"]["schema"] + assert json.loads(schema_str) == { + "type": "object", + "properties": {"answer": {"type": "string"}}, + "additionalProperties": False, + } + finally: + litellm.model_cost = old_cost + if old_env is None: + os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None) + else: + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = old_env def test_transform_request_with_output_config(): @@ -3227,11 +2924,7 @@ def test_transform_response_native_structured_output(): "output": { "message": { "role": "assistant", - "content": [ - { - "text": '{"temp": 62, "description": "Mild and foggy"}' - } - ], + "content": [{"text": '{"temp": 62, "description": "Mild and foggy"}'}], } }, "stopReason": "end_turn", @@ -3388,23 +3081,34 @@ def test_add_additional_properties_definitions(): def test_json_object_no_schema_falls_back_to_tool_call(): """response_format: {type: json_object} with no schema should use tool-call fallback, even for models that support native structured outputs.""" - config = AmazonConverseConfig() - optional_params: dict = {} - non_default_params = {"response_format": {"type": "json_object"}} + old_env = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP") + old_cost = litellm.model_cost + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + try: + config = AmazonConverseConfig() + optional_params: dict = {} + non_default_params = {"response_format": {"type": "json_object"}} - result = config._translate_response_format_param( - value=non_default_params["response_format"], - model="anthropic.claude-sonnet-4-5-20250929-v1:0", - optional_params=optional_params, - non_default_params=non_default_params, - is_thinking_enabled=False, - ) + result = config._translate_response_format_param( + value=non_default_params["response_format"], + model="anthropic.claude-sonnet-4-5-20250929-v1:0", + optional_params=optional_params, + non_default_params=non_default_params, + is_thinking_enabled=False, + ) - # Should NOT use native outputConfig (no schema provided) - assert "outputConfig" not in result - # Should use tool-call fallback - assert "tools" in result - assert result["json_mode"] is True + # Should NOT use native outputConfig (no schema provided) + assert "outputConfig" not in result + # Should use tool-call fallback + assert "tools" in result + assert result["json_mode"] is True + finally: + litellm.model_cost = old_cost + if old_env is None: + os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None) + else: + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = old_env def test_output_config_applies_additional_properties(): @@ -3427,7 +3131,6 @@ def test_output_config_applies_additional_properties(): assert parsed["properties"]["nested"]["additionalProperties"] is False - _TOOL_PARAM = [ { "type": "function", @@ -3505,9 +3208,7 @@ def test_parallel_tool_calls_older_model_drops_disable_flag(): class TestBedrockMinThinkingBudgetTokens: """Test that thinking.budget_tokens is clamped to the Bedrock minimum (1024).""" - def _map_params( - self, thinking_value, model="anthropic.claude-3-7-sonnet-20250219-v1:0" - ): + def _map_params(self, thinking_value, model="anthropic.claude-3-7-sonnet-20250219-v1:0"): """Helper to call map_openai_params with the given thinking value.""" config = AmazonConverseConfig() non_default_params = {"thinking": thinking_value} @@ -3545,6 +3246,7 @@ def test_no_thinking_param_does_not_error(self): ) assert "thinking" not in result or result.get("thinking") is None + def test_transform_response_with_both_json_tool_call_and_real_tool(): """ When Bedrock returns BOTH json_tool_call AND a real tool (get_weather), @@ -3733,9 +3435,7 @@ def test_streaming_filters_json_tool_call_with_real_tools(): # Chunk 2: json_tool_call delta — should become text, not tool_use json_delta = ContentBlockDeltaEvent(toolUse={"input": '{"temp": 62}'}) - text_2, tool_use_2, _, _, _ = decoder._handle_converse_delta_event( - json_delta, index=0 - ) + text_2, tool_use_2, _, _, _ = decoder._handle_converse_delta_event(json_delta, index=0) assert text_2 == '{"temp": 62}' assert tool_use_2 is None @@ -3758,12 +3458,8 @@ def test_streaming_filters_json_tool_call_with_real_tools(): assert decoder.tool_calls_index == 0 # Chunk 5: real tool delta - real_delta = ContentBlockDeltaEvent( - toolUse={"input": '{"location": "SF"}'} - ) - text_5, tool_use_5, _, _, _ = decoder._handle_converse_delta_event( - real_delta, index=1 - ) + real_delta = ContentBlockDeltaEvent(toolUse={"input": '{"location": "SF"}'}) + text_5, tool_use_5, _, _, _ = decoder._handle_converse_delta_event(real_delta, index=1) assert text_5 == "" assert tool_use_5 is not None assert tool_use_5["function"]["arguments"] == '{"location": "SF"}' @@ -3796,10 +3492,7 @@ def test_streaming_without_json_mode_passes_all_tools(): # json_tool_call delta — should be a tool_use, not text json_delta = ContentBlockDeltaEvent(toolUse={"input": '{"data": 1}'}) - text, tool_use_delta, _, _, _ = decoder._handle_converse_delta_event( - json_delta, index=0 - ) + text, tool_use_delta, _, _, _ = decoder._handle_converse_delta_event(json_delta, index=0) assert text == "" assert tool_use_delta is not None assert tool_use_delta["function"]["arguments"] == '{"data": 1}' -