diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py index a5b169fbbf..884a3a1fbc 100644 --- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py +++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py @@ -1,5 +1,4 @@ # Copyright (c) Microsoft. All rights reserved. - from collections.abc import AsyncIterable, MutableMapping, MutableSequence, Sequence from typing import Any, ClassVar, Final, TypeVar @@ -13,7 +12,10 @@ ChatResponse, ChatResponseUpdate, CitationAnnotation, + CodeInterpreterToolCallContent, + CodeInterpreterToolResultContent, Contents, + ErrorContent, FinishReason, FunctionCallContent, FunctionResultContent, @@ -21,6 +23,8 @@ HostedFileContent, HostedMCPTool, HostedWebSearchTool, + MCPServerToolCallContent, + MCPServerToolResultContent, Role, TextContent, TextReasoningContent, @@ -45,6 +49,8 @@ BetaTextBlock, BetaUsage, ) +from anthropic.types.beta.beta_bash_code_execution_tool_result_error import BetaBashCodeExecutionToolResultError +from anthropic.types.beta.beta_code_execution_tool_result_error import BetaCodeExecutionToolResultError from pydantic import SecretStr, ValidationError logger = get_logger("agent_framework.anthropic") @@ -588,23 +594,49 @@ def _parse_contents_from_anthropic( ) case "tool_use" | "mcp_tool_use" | "server_tool_use": self._last_call_id_name = (content_block.id, content_block.name) - contents.append( - FunctionCallContent( - call_id=content_block.id, - name=content_block.name, - arguments=content_block.input, - raw_representation=content_block, + if content_block.type == "mcp_tool_use": + contents.append( + MCPServerToolCallContent( + call_id=content_block.id, + tool_name=content_block.name, + server_name=None, + arguments=content_block.input, + raw_representation=content_block, + ) + ) + elif "code_execution" in (content_block.name or ""): + contents.append( + CodeInterpreterToolCallContent( + call_id=content_block.id, + inputs=[TextContent(text=str(content_block.input), raw_representation=content_block)], + raw_representation=content_block, + ) + ) + else: + contents.append( + FunctionCallContent( + call_id=content_block.id, + name=content_block.name, + arguments=content_block.input, + raw_representation=content_block, + ) ) - ) case "mcp_tool_result": call_id, name = self._last_call_id_name or (None, None) + parsed_output: list[Contents] | None = None + if content_block.content: + if isinstance(content_block.content, list): + parsed_output = self._parse_contents_from_anthropic(content_block.content) + elif isinstance(content_block.content, (str, bytes)): + parsed_output = [ + TextContent(text=str(content_block.content), raw_representation=content_block) + ] + else: + parsed_output = self._parse_contents_from_anthropic([content_block.content]) contents.append( - FunctionResultContent( + MCPServerToolResultContent( call_id=content_block.tool_use_id, - name=name if name and call_id == content_block.tool_use_id else "mcp_tool", - result=self._parse_contents_from_anthropic(content_block.content) - if isinstance(content_block.content, list) - else content_block.content, + output=parsed_output, raw_representation=content_block, ) ) @@ -618,30 +650,183 @@ def _parse_contents_from_anthropic( raw_representation=content_block, ) ) - case ( - "code_execution_tool_result" - | "bash_code_execution_tool_result" - | "text_editor_code_execution_tool_result" - ): - call_id, name = self._last_call_id_name or (None, None) - if ( - content_block.content - and ( - content_block.content.type == "bash_code_execution_result" - or content_block.content.type == "code_execution_result" + case "code_execution_tool_result": + code_outputs: list[Contents] = [] + if content_block.content: + if isinstance(content_block.content, BetaCodeExecutionToolResultError): + code_outputs.append( + ErrorContent( + message=content_block.content.error_code, + raw_representation=content_block.content, + ) + ) + else: + if content_block.content.stdout: + code_outputs.append( + TextContent( + text=content_block.content.stdout, + raw_representation=content_block.content, + ) + ) + if content_block.content.stderr: + code_outputs.append( + ErrorContent( + message=content_block.content.stderr, + raw_representation=content_block.content, + ) + ) + for code_file_content in content_block.content.content: + code_outputs.append( + HostedFileContent( + file_id=code_file_content.file_id, raw_representation=code_file_content + ) + ) + contents.append( + CodeInterpreterToolResultContent( + call_id=content_block.tool_use_id, + raw_representation=content_block, + outputs=code_outputs, ) - and content_block.content.content - ): - for result_content in content_block.content.content: - if hasattr(result_content, "file_id"): + ) + case "bash_code_execution_tool_result": + bash_outputs: list[Contents] = [] + if content_block.content: + if isinstance( + content_block.content, + BetaBashCodeExecutionToolResultError, + ): + bash_outputs.append( + ErrorContent( + message=content_block.content.error_code, + raw_representation=content_block.content, + ) + ) + else: + if content_block.content.stdout: + bash_outputs.append( + TextContent( + text=content_block.content.stdout, + raw_representation=content_block.content, + ) + ) + if content_block.content.stderr: + bash_outputs.append( + ErrorContent( + message=content_block.content.stderr, + raw_representation=content_block.content, + ) + ) + for bash_file_content in content_block.content.content: contents.append( - HostedFileContent(file_id=result_content.file_id, raw_representation=result_content) + HostedFileContent( + file_id=bash_file_content.file_id, raw_representation=bash_file_content + ) ) contents.append( FunctionResultContent( call_id=content_block.tool_use_id, - name=name if name and call_id == content_block.tool_use_id else "code_execution_tool", - result=content_block.content, + name=content_block.type, + result=bash_outputs, + raw_representation=content_block, + ) + ) + case "text_editor_code_execution_tool_result": + text_editor_outputs: list[Contents] = [] + match content_block.content.type: + case "text_editor_code_execution_tool_result_error": + text_editor_outputs.append( + ErrorContent( + message=content_block.content.error_code + and getattr(content_block.content, "error_message", ""), + raw_representation=content_block.content, + ) + ) + case "text_editor_code_execution_view_result": + annotations = ( + [ + CitationAnnotation( + raw_representation=content_block.content, + annotated_regions=[ + TextSpanRegion( + start_index=content_block.content.start_line, + end_index=content_block.content.start_line + + (content_block.content.num_lines or 0), + ) + ], + ) + ] + if content_block.content.num_lines is not None + and content_block.content.start_line is not None + else None + ) + text_editor_outputs.append( + TextContent( + text=content_block.content.content, + annotations=annotations, + raw_representation=content_block.content, + ) + ) + case "text_editor_code_execution_str_replace_result": + old_annotation = ( + CitationAnnotation( + raw_representation=content_block.content, + annotated_regions=[ + TextSpanRegion( + start_index=content_block.content.old_start or 0, + end_index=( + (content_block.content.old_start or 0) + + (content_block.content.old_lines or 0) + ), + ) + ], + ) + if content_block.content.old_lines is not None + and content_block.content.old_start is not None + else None + ) + new_annotation = ( + CitationAnnotation( + raw_representation=content_block.content, + snippet="\n".join(content_block.content.lines) + if content_block.content.lines + else None, + annotated_regions=[ + TextSpanRegion( + start_index=content_block.content.new_start or 0, + end_index=( + (content_block.content.new_start or 0) + + (content_block.content.new_lines or 0) + ), + ) + ], + ) + if content_block.content.new_lines is not None + and content_block.content.new_start is not None + else None + ) + annotations = [ann for ann in [old_annotation, new_annotation] if ann is not None] + + text_editor_outputs.append( + TextContent( + text=( + "\n".join(content_block.content.lines) if content_block.content.lines else "" + ), + annotations=annotations or None, + raw_representation=content_block.content, + ) + ) + case "text_editor_code_execution_create_result": + text_editor_outputs.append( + TextContent( + text=f"File update: {content_block.content.is_file_update}", + raw_representation=content_block.content, + ) + ) + contents.append( + FunctionResultContent( + call_id=content_block.tool_use_id, + name=content_block.type, + result=text_editor_outputs, raw_representation=content_block, ) ) diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 07b11811f3..24481c3b3b 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -16,6 +16,7 @@ Generic, Literal, Protocol, + TypedDict, TypeVar, cast, get_args, @@ -73,6 +74,7 @@ "FunctionInvocationConfiguration", "HostedCodeInterpreterTool", "HostedFileSearchTool", + "HostedImageGenerationTool", "HostedMCPSpecificApproval", "HostedMCPTool", "HostedWebSearchTool", @@ -324,6 +326,41 @@ def __init__( super().__init__(**args) +class HostedImageGenerationToolOptions(TypedDict, total=False): + """Options for HostedImageGenerationTool.""" + + count: int + image_size: str + media_type: str + model_id: str + response_format: Literal["uri", "data", "hosted"] + streaming_count: int + + +class HostedImageGenerationTool(BaseTool): + """Represents a hosted tool that can be specified to an AI service to enable it to perform image generation.""" + + def __init__( + self, + *, + options: HostedImageGenerationToolOptions | None = None, + description: str | None = None, + additional_properties: dict[str, Any] | None = None, + **kwargs: Any, + ): + """Initialize a HostedImageGenerationTool.""" + if "name" in kwargs: + raise ValueError("The 'name' argument is reserved for the HostedImageGenerationTool and cannot be set.") + + self.options = options + super().__init__( + name="image_generation", + description=description or "", + additional_properties=additional_properties, + **kwargs, + ) + + class HostedMCPSpecificApproval(TypedDict, total=False): """Represents the specific mode for a hosted tool. @@ -1419,14 +1456,11 @@ async def _auto_invoke_function( Raises: KeyError: If the requested function is not found in the tool map. """ - from ._types import ( - FunctionResultContent, - ) - # Note: The scenarios for approval_mode="always_require", declaration_only, and # terminate_on_unknown_calls are all handled in _try_execute_function_calls before # this function is called. This function only handles the actual execution of approved, # non-declaration-only functions. + from ._types import FunctionCallContent, FunctionResultContent tool: AIFunction[BaseModel, Any] | None = None if function_call_content.type == "function_call": @@ -1444,11 +1478,14 @@ async def _auto_invoke_function( else: # Note: Unapproved tools (approved=False) are handled in _replace_approval_contents_with_results # and never reach this function, so we only handle approved=True cases here. - tool = tool_map.get(function_call_content.function_call.name) + inner_call = function_call_content.function_call + if not isinstance(inner_call, FunctionCallContent): + return function_call_content + tool = tool_map.get(inner_call.name) if tool is None: # we assume it is a hosted tool return function_call_content - function_call_content = function_call_content.function_call + function_call_content = inner_call parsed_args: dict[str, Any] = dict(function_call_content.parse_arguments() or {}) diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py index a99440a771..ebe3d23e6f 100644 --- a/python/packages/core/agent_framework/_types.py +++ b/python/packages/core/agent_framework/_types.py @@ -40,6 +40,8 @@ "ChatResponse", "ChatResponseUpdate", "CitationAnnotation", + "CodeInterpreterToolCallContent", + "CodeInterpreterToolResultContent", "Contents", "DataContent", "ErrorContent", @@ -50,6 +52,10 @@ "FunctionResultContent", "HostedFileContent", "HostedVectorStoreContent", + "ImageGenerationToolCallContent", + "ImageGenerationToolResultContent", + "MCPServerToolCallContent", + "MCPServerToolResultContent", "Role", "TextContent", "TextReasoningContent", @@ -121,6 +127,18 @@ def _parse_content(content_data: MutableMapping[str, Any]) -> "Contents": return HostedFileContent.from_dict(content_data) case "hosted_vector_store": return HostedVectorStoreContent.from_dict(content_data) + case "code_interpreter_tool_call": + return CodeInterpreterToolCallContent.from_dict(content_data) + case "code_interpreter_tool_result": + return CodeInterpreterToolResultContent.from_dict(content_data) + case "image_generation_tool_call": + return ImageGenerationToolCallContent.from_dict(content_data) + case "image_generation_tool_result": + return ImageGenerationToolResultContent.from_dict(content_data) + case "mcp_server_tool_call": + return MCPServerToolCallContent.from_dict(content_data) + case "mcp_server_tool_result": + return MCPServerToolResultContent.from_dict(content_data) case "function_approval_request": return FunctionApprovalRequestContent.from_dict(content_data) case "function_approval_response": @@ -1607,6 +1625,8 @@ def __init__( self, file_id: str, *, + media_type: str | None = None, + name: str | None = None, additional_properties: dict[str, Any] | None = None, raw_representation: Any | None = None, **kwargs: Any, @@ -1615,6 +1635,8 @@ def __init__( Args: file_id: The identifier of the hosted file. + media_type: Optional media type of the hosted file. + name: Optional display name of the hosted file. Keyword Args: additional_properties: Optional additional properties associated with the content. @@ -1627,8 +1649,14 @@ def __init__( **kwargs, ) self.file_id = file_id + self.media_type = media_type + self.name = name self.type: Literal["hosted_file"] = "hosted_file" + def has_top_level_media_type(self, top_level_media_type: Literal["application", "audio", "image", "text"]) -> bool: + """Returns a boolean indicating if the media type has the specified top-level media type.""" + return _has_top_level_media_type(self.media_type, top_level_media_type) + class HostedVectorStoreContent(BaseContent): """Represents a hosted vector store content. @@ -1676,6 +1704,234 @@ def __init__( self.type: Literal["hosted_vector_store"] = "hosted_vector_store" +class CodeInterpreterToolCallContent(BaseContent): + """Represents a code interpreter tool call invocation by a hosted service.""" + + def __init__( + self, + *, + call_id: str | None = None, + inputs: Sequence["Contents | MutableMapping[str, Any]"] | None = None, + annotations: Sequence[Annotations | MutableMapping[str, Any]] | None = None, + additional_properties: dict[str, Any] | None = None, + raw_representation: Any | None = None, + **kwargs: Any, + ) -> None: + super().__init__( + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + **kwargs, + ) + self.call_id = call_id + self.inputs: list["Contents"] | None = None + if inputs: + normalized_inputs: Sequence["Contents | MutableMapping[str, Any]"] = ( + inputs + if isinstance(inputs, Sequence) and not isinstance(inputs, (str, bytes, MutableMapping)) + else [inputs] + ) + self.inputs = _parse_content_list(list(normalized_inputs)) + self.type: Literal["code_interpreter_tool_call"] = "code_interpreter_tool_call" + + +class CodeInterpreterToolResultContent(BaseContent): + """Represents the result of a code interpreter tool invocation by a hosted service.""" + + def __init__( + self, + *, + call_id: str | None = None, + outputs: Sequence["Contents | MutableMapping[str, Any]"] | None = None, + annotations: Sequence[Annotations | MutableMapping[str, Any]] | None = None, + additional_properties: dict[str, Any] | None = None, + raw_representation: Any | None = None, + **kwargs: Any, + ) -> None: + super().__init__( + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + **kwargs, + ) + self.call_id = call_id + self.outputs: list["Contents"] | None = None + if outputs: + normalized_outputs: Sequence["Contents | MutableMapping[str, Any]"] = ( + outputs + if isinstance(outputs, Sequence) and not isinstance(outputs, (str, bytes, MutableMapping)) + else [outputs] + ) + self.outputs = _parse_content_list(list(normalized_outputs)) + self.type: Literal["code_interpreter_tool_result"] = "code_interpreter_tool_result" + + +class ImageGenerationToolCallContent(BaseContent): + """Represents the invocation of an image generation tool call by a hosted service.""" + + def __init__( + self, + *, + image_id: str | None = None, + annotations: Sequence[Annotations | MutableMapping[str, Any]] | None = None, + additional_properties: dict[str, Any] | None = None, + raw_representation: Any | None = None, + **kwargs: Any, + ) -> None: + """Initializes an ImageGenerationToolCallContent instance. + + Keyword Args: + image_id: The identifier of the image to be generated. + annotations: Optional annotations associated with the content. + additional_properties: Optional additional properties associated with the content. + raw_representation: Optional raw representation of the content. + **kwargs: Any additional keyword arguments. + + """ + super().__init__( + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + **kwargs, + ) + self.image_id = image_id + self.type: Literal["image_generation_tool_call"] = "image_generation_tool_call" + + +class ImageGenerationToolResultContent(BaseContent): + """Represents the result of an image generation tool call invocation by a hosted service.""" + + def __init__( + self, + *, + image_id: str | None = None, + outputs: DataContent | UriContent | None = None, + annotations: Sequence[Annotations | MutableMapping[str, Any]] | None = None, + additional_properties: dict[str, Any] | None = None, + raw_representation: Any | None = None, + **kwargs: Any, + ) -> None: + """Initializes an ImageGenerationToolResultContent instance. + + Keyword Args: + image_id: The identifier of the generated image. + outputs: The outputs of the image generation tool call. + annotations: Optional annotations associated with the content. + additional_properties: Optional additional properties associated with the content. + raw_representation: Optional raw representation of the content. + **kwargs: Any additional keyword arguments. + + """ + super().__init__( + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + **kwargs, + ) + self.image_id = image_id + self.outputs: DataContent | UriContent | None = outputs + self.type: Literal["image_generation_tool_result"] = "image_generation_tool_result" + + +class MCPServerToolCallContent(BaseContent): + """Represents a tool call request to a MCP server.""" + + def __init__( + self, + call_id: str, + tool_name: str, + server_name: str | None = None, + *, + arguments: str | Mapping[str, Any] | None = None, + annotations: Sequence[Annotations | MutableMapping[str, Any]] | None = None, + additional_properties: dict[str, Any] | None = None, + raw_representation: Any | None = None, + **kwargs: Any, + ) -> None: + """Initializes a MCPServerToolCallContent instance. + + Args: + call_id: The tool call identifier. + tool_name: The name of the tool requested. + server_name: The name of the MCP server where the tool is hosted. + + Keyword Args: + arguments: The arguments requested to be provided to the tool, + can be a string to allow gradual completion of the args. + annotations: Optional annotations associated with the content. + additional_properties: Optional additional properties associated with the content. + raw_representation: Optional raw representation of the content. + **kwargs: Any additional keyword arguments. + """ + if not call_id: + raise ValueError("call_id must be a non-empty string.") + if not tool_name: + raise ValueError("tool_name must be a non-empty string.") + super().__init__( + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + **kwargs, + ) + self.call_id = call_id + self.tool_name = tool_name + self.name = tool_name + self.server_name = server_name + self.arguments = arguments + self.type: Literal["mcp_server_tool_call"] = "mcp_server_tool_call" + + def parse_arguments(self) -> dict[str, Any] | None: + """Returns the parsed arguments for the MCP server tool call, if any.""" + if isinstance(self.arguments, str): + # If arguments are a string, try to parse it as JSON + try: + loaded = json.loads(self.arguments) + if isinstance(loaded, dict): + return loaded # type:ignore + return {"raw": loaded} + except (json.JSONDecodeError, TypeError): + return {"raw": self.arguments} + return cast(dict[str, Any] | None, self.arguments) + + +class MCPServerToolResultContent(BaseContent): + """Represents the result of a MCP server tool call.""" + + def __init__( + self, + call_id: str, + *, + output: Any | None = None, + annotations: Sequence[Annotations | MutableMapping[str, Any]] | None = None, + additional_properties: dict[str, Any] | None = None, + raw_representation: Any | None = None, + **kwargs: Any, + ) -> None: + """Initializes a MCPServerToolResultContent instance. + + Args: + call_id: The identifier of the tool call for which this is the result. + + Keyword Args: + output: The output of the MCP server tool call. + annotations: Optional annotations associated with the content. + additional_properties: Optional additional properties associated with the content. + raw_representation: Optional raw representation of the content. + **kwargs: Any additional keyword arguments. + """ + if not call_id: + raise ValueError("call_id must be a non-empty string.") + super().__init__( + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + **kwargs, + ) + self.call_id = call_id + self.output: Any | None = output + self.type: Literal["mcp_server_tool_result"] = "mcp_server_tool_result" + + class BaseUserInputRequest(BaseContent): """Base class for all user requests.""" @@ -1736,7 +1992,7 @@ def __init__( approved: bool, *, id: str, - function_call: FunctionCallContent | MutableMapping[str, Any], + function_call: FunctionCallContent | MCPServerToolCallContent | MutableMapping[str, Any], annotations: Sequence[Annotations | MutableMapping[str, Any]] | None = None, additional_properties: dict[str, Any] | None = None, raw_representation: Any | None = None, @@ -1764,8 +2020,12 @@ def __init__( self.id = id self.approved = approved # Convert dict to FunctionCallContent if needed (for SerializationMixin support) + self.function_call: FunctionCallContent | MCPServerToolCallContent if isinstance(function_call, MutableMapping): - self.function_call = FunctionCallContent.from_dict(function_call) + if function_call.get("type") == "mcp_server_tool_call": + self.function_call = MCPServerToolCallContent.from_dict(function_call) + else: + self.function_call = FunctionCallContent.from_dict(function_call) else: self.function_call = function_call # Override the type for this specific subclass @@ -1823,6 +2083,7 @@ def __init__( **kwargs, ) self.id = id + self.function_call: FunctionCallContent # Convert dict to FunctionCallContent if needed (for SerializationMixin support) if isinstance(function_call, MutableMapping): self.function_call = FunctionCallContent.from_dict(function_call) @@ -1854,6 +2115,12 @@ def create_response(self, approved: bool) -> "FunctionApprovalResponseContent": | UsageContent | HostedFileContent | HostedVectorStoreContent + | CodeInterpreterToolCallContent + | CodeInterpreterToolResultContent + | ImageGenerationToolCallContent + | ImageGenerationToolResultContent + | MCPServerToolCallContent + | MCPServerToolResultContent | FunctionApprovalRequestContent | FunctionApprovalResponseContent ) diff --git a/python/packages/core/agent_framework/openai/_assistants_client.py b/python/packages/core/agent_framework/openai/_assistants_client.py index e790a44940..b6f97371b7 100644 --- a/python/packages/core/agent_framework/openai/_assistants_client.py +++ b/python/packages/core/agent_framework/openai/_assistants_client.py @@ -3,7 +3,7 @@ import json import sys from collections.abc import AsyncIterable, Awaitable, Callable, Mapping, MutableMapping, MutableSequence -from typing import Any +from typing import Any, cast from openai import AsyncOpenAI from openai.types.beta.threads import ( @@ -28,9 +28,11 @@ ChatOptions, ChatResponse, ChatResponseUpdate, + CodeInterpreterToolCallContent, Contents, FunctionCallContent, FunctionResultContent, + MCPServerToolCallContent, Role, TextContent, ToolMode, @@ -377,10 +379,37 @@ def _parse_function_calls_from_assistants(self, event_data: Run, response_id: st if event_data.required_action is not None: for tool_call in event_data.required_action.submit_tool_outputs.tool_calls: + tool_call_any = cast(Any, tool_call) call_id = json.dumps([response_id, tool_call.id]) - function_name = tool_call.function.name - function_arguments = json.loads(tool_call.function.arguments) - contents.append(FunctionCallContent(call_id=call_id, name=function_name, arguments=function_arguments)) + tool_type = getattr(tool_call, "type", None) + if tool_type == "code_interpreter" and getattr(tool_call_any, "code_interpreter", None): + code_input = getattr(tool_call_any.code_interpreter, "input", None) + inputs = ( + [TextContent(text=code_input, raw_representation=tool_call)] if code_input is not None else None + ) + contents.append( + CodeInterpreterToolCallContent( + call_id=call_id, + inputs=inputs, + raw_representation=tool_call, + ) + ) + elif tool_type == "mcp": + contents.append( + MCPServerToolCallContent( + call_id=call_id, + tool_name=getattr(tool_call, "name", "") or "", + server_name=getattr(tool_call, "server_label", None), + arguments=getattr(tool_call, "args", None), + raw_representation=tool_call, + ) + ) + else: + function_name = tool_call.function.name + function_arguments = json.loads(tool_call.function.arguments) + contents.append( + FunctionCallContent(call_id=call_id, name=function_name, arguments=function_arguments) + ) return contents diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 579452ef62..ec797b75be 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -31,6 +31,7 @@ AIFunction, HostedCodeInterpreterTool, HostedFileSearchTool, + HostedImageGenerationTool, HostedMCPTool, HostedWebSearchTool, ToolProtocol, @@ -42,6 +43,8 @@ ChatResponse, ChatResponseUpdate, CitationAnnotation, + CodeInterpreterToolCallContent, + CodeInterpreterToolResultContent, Contents, DataContent, FunctionApprovalRequestContent, @@ -50,6 +53,10 @@ FunctionResultContent, HostedFileContent, HostedVectorStoreContent, + ImageGenerationToolCallContent, + ImageGenerationToolResultContent, + MCPServerToolCallContent, + MCPServerToolResultContent, Role, TextContent, TextReasoningContent, @@ -57,6 +64,7 @@ UriContent, UsageContent, UsageDetails, + _parse_content, prepare_function_call_results, ) from ..exceptions import ( @@ -314,39 +322,28 @@ def _prepare_tools_for_openai( else None, ) ) + case HostedImageGenerationTool(): + mapped_tool: dict[str, Any] = {"type": "image_generation"} + if tool.options: + option_mapping = { + "image_size": "size", + "media_type": "output_format", + "model_id": "model", + "streaming_count": "partial_images", + } + # count and response_format are not supported by Responses API + for key, value in tool.options.items(): + mapped_key = option_mapping.get(key, key) + mapped_tool[mapped_key] = value + if tool.additional_properties: + mapped_tool.update(tool.additional_properties) + response_tools.append(mapped_tool) case _: logger.debug("Unsupported tool passed (type: %s)", type(tool)) else: # Handle raw dictionary tools tool_dict = tool if isinstance(tool, dict) else dict(tool) - - # Special handling for image_generation tools - if tool_dict.get("type") == "image_generation": - # Create a copy to avoid modifying the original - mapped_tool = tool_dict.copy() - - # Map user-friendly parameter names to OpenAI API parameter names - parameter_mapping = { - "format": "output_format", - "compression": "output_compression", - } - - for user_param, api_param in parameter_mapping.items(): - if user_param in mapped_tool: - # Map the parameter name and remove the old one - mapped_tool[api_param] = mapped_tool.pop(user_param) - - # Validate partial_images parameter for streaming image generation - # OpenAI API requires partial_images to be between 0-3 (inclusive) for image_generation tool - # Reference: https://platform.openai.com/docs/api-reference/responses/create#responses_create-tools-image_generation_tool-partial_images - if "partial_images" in mapped_tool: - partial_images = mapped_tool["partial_images"] - if not isinstance(partial_images, int) or partial_images < 0 or partial_images > 3: - raise ValueError("partial_images must be an integer between 0 and 3 (inclusive).") - - response_tools.append(mapped_tool) - else: - response_tools.append(tool_dict) + response_tools.append(tool_dict) return response_tools @staticmethod @@ -767,22 +764,35 @@ def _parse_response_from_openai( TextReasoningContent(text=summary.text, raw_representation=summary) # type: ignore[arg-type] ) case "code_interpreter_call": # ResponseOutputCodeInterpreterCall - if hasattr(item, "outputs") and item.outputs: - for code_output in item.outputs: - if code_output.type == "logs": - contents.append(TextContent(text=code_output.logs, raw_representation=item)) - if code_output.type == "image": - contents.append( + call_id = getattr(item, "call_id", None) or getattr(item, "id", None) + outputs: list["Contents"] = [] + if item_outputs := getattr(item, "outputs", None): + for code_output in item_outputs: + if getattr(code_output, "type", None) == "logs": + outputs.append(TextContent(text=code_output.logs, raw_representation=code_output)) + elif getattr(code_output, "type", None) == "image": + outputs.append( UriContent( uri=code_output.url, - raw_representation=item, - # no more specific media type then this can be inferred + raw_representation=code_output, media_type="image", ) ) - elif hasattr(item, "code") and item.code: - # fallback if no output was returned is the code: - contents.append(TextContent(text=item.code, raw_representation=item)) + if code := getattr(item, "code", None): + contents.append( + CodeInterpreterToolCallContent( + call_id=call_id, + inputs=[TextContent(text=code, raw_representation=item)], + raw_representation=item, + ) + ) + contents.append( + CodeInterpreterToolResultContent( + call_id=call_id, + outputs=outputs, + raw_representation=item, + ) + ) case "function_call": # ResponseOutputFunctionCall contents.append( FunctionCallContent( @@ -806,31 +816,49 @@ def _parse_response_from_openai( ), ) ) - case "image_generation_call": # ResponseOutputImageGenerationCall - if item.result: - # Handle the result as either a proper data URI or raw base64 string - uri = item.result - media_type = None - if not uri.startswith("data:"): - # Raw base64 string - convert to proper data URI format using helper - uri, media_type = DataContent.create_data_uri_from_base64(uri) - else: - # Parse media type from existing data URI - try: - # Extract media type from data URI (e.g., "data:image/png;base64,...") - if ";" in uri and uri.startswith("data:"): - media_type = uri.split(";")[0].split(":", 1)[1] - except Exception: - # Fallback if parsing fails - media_type = "image" + case "mcp_call": + call_id = item.id + contents.append( + MCPServerToolCallContent( + call_id=call_id, + tool_name=item.name, + server_name=item.server_label, + arguments=item.arguments, + raw_representation=item, + ) + ) + if item.output is not None: contents.append( - DataContent( - uri=uri, - media_type=media_type, + MCPServerToolResultContent( + call_id=call_id, + output=[TextContent(text=item.output)], raw_representation=item, ) ) - # TODO(peterychang): Add support for other content types + case "image_generation_call": # ResponseOutputImageGenerationCall + image_output: DataContent | None = None + if item.result: + base64_data = item.result + image_format = DataContent.detect_image_format_from_base64(base64_data) + image_output = DataContent( + data=base64_data, + media_type=f"image/{image_format}" if image_format else "image/png", + raw_representation=item.result, + ) + image_id = item.id + contents.append( + ImageGenerationToolCallContent( + image_id=image_id, + raw_representation=item, + ) + ) + contents.append( + ImageGenerationToolResultContent( + image_id=image_id, + outputs=image_output, + raw_representation=item, + ) + ) case _: logger.debug("Unparsed output of type: %s: %s", item.type, item) response_message = ChatMessage(role="assistant", contents=contents) @@ -994,23 +1022,70 @@ def _parse_chunk_from_openai( ), ) ) + case "mcp_call": + call_id = getattr(event_item, "id", None) or getattr(event_item, "call_id", None) or "" + contents.append( + MCPServerToolCallContent( + call_id=call_id, + tool_name=getattr(event_item, "name", "") or "", + server_name=getattr(event_item, "server_label", None), + arguments=getattr(event_item, "arguments", None), + raw_representation=event_item, + ) + ) + result_output = ( + getattr(event_item, "result", None) + or getattr(event_item, "output", None) + or getattr(event_item, "outputs", None) + ) + parsed_output: list[Contents] | None = None + if result_output: + normalized = ( + result_output + if isinstance(result_output, Sequence) + and not isinstance(result_output, (str, bytes, MutableMapping)) + else [result_output] + ) + parsed_output = [_parse_content(output_item) for output_item in normalized] + contents.append( + MCPServerToolResultContent( + call_id=call_id, + output=parsed_output, + raw_representation=event_item, + ) + ) case "code_interpreter_call": # ResponseOutputCodeInterpreterCall + call_id = getattr(event_item, "call_id", None) or getattr(event_item, "id", None) + outputs: list[Contents] = [] if hasattr(event_item, "outputs") and event_item.outputs: for code_output in event_item.outputs: - if code_output.type == "logs": - contents.append(TextContent(text=code_output.logs, raw_representation=event_item)) - if code_output.type == "image": - contents.append( + if getattr(code_output, "type", None) == "logs": + outputs.append( + TextContent(text=cast(Any, code_output).logs, raw_representation=code_output) + ) + elif getattr(code_output, "type", None) == "image": + outputs.append( UriContent( - uri=code_output.url, - raw_representation=event_item, - # no more specific media type then this can be inferred + uri=cast(Any, code_output).url, + raw_representation=code_output, media_type="image", ) ) - elif hasattr(event_item, "code") and event_item.code: - # fallback if no output was returned is the code: - contents.append(TextContent(text=event_item.code, raw_representation=event_item)) + if hasattr(event_item, "code") and event_item.code: + contents.append( + CodeInterpreterToolCallContent( + call_id=call_id, + inputs=[TextContent(text=event_item.code, raw_representation=event_item)], + raw_representation=event_item, + ) + ) + contents.append( + CodeInterpreterToolResultContent( + call_id=call_id, + outputs=outputs, + raw_representation=event_item, + ) + ) case "reasoning": # ResponseOutputReasoning if hasattr(event_item, "content") and event_item.content: for index, reasoning_content in enumerate(event_item.content): @@ -1050,14 +1125,27 @@ def _parse_chunk_from_openai( # Use helper function to create data URI from base64 uri, media_type = DataContent.create_data_uri_from_base64(image_base64) + image_output = DataContent( + uri=uri, + media_type=media_type, + additional_properties={ + "partial_image_index": partial_index, + "is_partial_image": True, + }, + raw_representation=event, + ) + + image_id = getattr(event, "item_id", None) contents.append( - DataContent( - uri=uri, - media_type=media_type, - additional_properties={ - "partial_image_index": partial_index, - "is_partial_image": True, - }, + ImageGenerationToolCallContent( + image_id=image_id, + raw_representation=event, + ) + ) + contents.append( + ImageGenerationToolResultContent( + image_id=image_id, + outputs=image_output, raw_representation=event, ) ) diff --git a/python/packages/core/tests/azure/test_azure_responses_client.py b/python/packages/core/tests/azure/test_azure_responses_client.py index 9b59a3d41a..ec19eaf833 100644 --- a/python/packages/core/tests/azure/test_azure_responses_client.py +++ b/python/packages/core/tests/azure/test_azure_responses_client.py @@ -552,26 +552,24 @@ async def test_azure_responses_client_agent_chat_options_agent_level() -> None: async def test_azure_responses_client_agent_hosted_mcp_tool() -> None: """Integration test for HostedMCPTool with Azure Response Agent using Microsoft Learn MCP.""" - mcp_tool = HostedMCPTool( - name="Microsoft Learn MCP", - url="https://learn.microsoft.com/api/mcp", - description="A Microsoft Learn MCP server for documentation questions", - approval_mode="never_require", - ) - async with ChatAgent( chat_client=AzureOpenAIResponsesClient(credential=AzureCliCredential()), instructions="You are a helpful assistant that can help with microsoft documentation questions.", - tools=[mcp_tool], + tools=HostedMCPTool( + name="Microsoft Learn MCP", + url="https://learn.microsoft.com/api/mcp", + description="A Microsoft Learn MCP server for documentation questions", + approval_mode="never_require", + ), ) as agent: response = await agent.run( "How to create an Azure storage account using az cli?", - max_tokens=200, + # this needs to be high enough to handle the full MCP tool response. + max_tokens=5000, ) assert isinstance(response, AgentRunResponse) - assert response.text is not None - assert len(response.text) > 0 + assert response.text # Should contain Azure-related content since it's asking about Azure CLI assert any(term in response.text.lower() for term in ["azure", "storage", "account", "cli"]) diff --git a/python/packages/core/tests/core/test_tools.py b/python/packages/core/tests/core/test_tools.py index 88c34dc3e8..f70e6ddb56 100644 --- a/python/packages/core/tests/core/test_tools.py +++ b/python/packages/core/tests/core/test_tools.py @@ -10,6 +10,7 @@ from agent_framework import ( AIFunction, HostedCodeInterpreterTool, + HostedImageGenerationTool, HostedMCPTool, ToolProtocol, ai_function, @@ -818,6 +819,30 @@ def test_hosted_code_interpreter_tool_with_unknown_input(): HostedCodeInterpreterTool(inputs={"hosted_file": "file-single"}) +def test_hosted_image_generation_tool_defaults(): + """HostedImageGenerationTool should default name and empty description.""" + tool = HostedImageGenerationTool() + + assert tool.name == "image_generation" + assert tool.description == "" + assert tool.options is None + assert str(tool) == "HostedImageGenerationTool(name=image_generation)" + + +def test_hosted_image_generation_tool_with_options(): + """HostedImageGenerationTool should store options.""" + tool = HostedImageGenerationTool( + description="Generate images", + options={"format": "png", "size": "1024x1024"}, + additional_properties={"quality": "high"}, + ) + + assert tool.name == "image_generation" + assert tool.description == "Generate images" + assert tool.options == {"format": "png", "size": "1024x1024"} + assert tool.additional_properties == {"quality": "high"} + + # region HostedMCPTool tests diff --git a/python/packages/core/tests/core/test_types.py b/python/packages/core/tests/core/test_types.py index 85f00a12ac..6e6e5bfee7 100644 --- a/python/packages/core/tests/core/test_types.py +++ b/python/packages/core/tests/core/test_types.py @@ -18,6 +18,8 @@ ChatResponse, ChatResponseUpdate, CitationAnnotation, + CodeInterpreterToolCallContent, + CodeInterpreterToolResultContent, DataContent, ErrorContent, FinishReason, @@ -27,6 +29,10 @@ FunctionResultContent, HostedFileContent, HostedVectorStoreContent, + ImageGenerationToolCallContent, + ImageGenerationToolResultContent, + MCPServerToolCallContent, + MCPServerToolResultContent, Role, TextContent, TextReasoningContent, @@ -269,6 +275,78 @@ def test_hosted_file_content_minimal(): assert isinstance(content, BaseContent) +def test_hosted_file_content_optional_fields(): + """HostedFileContent should capture optional media type and name.""" + content = HostedFileContent(file_id="file-789", media_type="image/png", name="plot.png") + + assert content.media_type == "image/png" + assert content.name == "plot.png" + assert content.has_top_level_media_type("image") + assert content.has_top_level_media_type("application") is False + + +# region: CodeInterpreter content + + +def test_code_interpreter_tool_call_content_parses_inputs(): + call = CodeInterpreterToolCallContent( + call_id="call-1", + inputs=[{"type": "text", "text": "print('hi')"}], + ) + + assert call.type == "code_interpreter_tool_call" + assert call.call_id == "call-1" + assert call.inputs and isinstance(call.inputs[0], TextContent) + assert call.inputs[0].text == "print('hi')" + + +def test_code_interpreter_tool_result_content_outputs(): + result = CodeInterpreterToolResultContent( + call_id="call-2", + outputs=[ + {"type": "text", "text": "log output"}, + {"type": "uri", "uri": "https://example.com/file.png", "media_type": "image/png"}, + ], + ) + + assert result.type == "code_interpreter_tool_result" + assert result.call_id == "call-2" + assert result.outputs is not None + assert isinstance(result.outputs[0], TextContent) + assert isinstance(result.outputs[1], UriContent) + + +# region: Image generation content + + +def test_image_generation_tool_contents(): + call = ImageGenerationToolCallContent(image_id="img-1") + outputs = [DataContent(data=b"1234", media_type="image/png")] + result = ImageGenerationToolResultContent(image_id="img-1", outputs=outputs) + + assert call.type == "image_generation_tool_call" + assert call.image_id == "img-1" + assert result.type == "image_generation_tool_result" + assert result.image_id == "img-1" + assert result.outputs and isinstance(result.outputs[0], DataContent) + + +# region: MCP server tool content + + +def test_mcp_server_tool_call_and_result(): + call = MCPServerToolCallContent(call_id="c-1", tool_name="tool", server_name="server", arguments={"x": 1}) + assert call.type == "mcp_server_tool_call" + assert call.arguments == {"x": 1} + + result = MCPServerToolResultContent(call_id="c-1", output=[{"type": "text", "text": "done"}]) + assert result.type == "mcp_server_tool_result" + assert result.output + + with raises(ValueError): + MCPServerToolCallContent(call_id="", tool_name="tool") + + # region: HostedVectorStoreContent @@ -469,6 +547,15 @@ def test_function_approval_serialization_roundtrip(): # The Contents union will need to be handled differently when we fully migrate +def test_function_approval_accepts_mcp_call(): + """Ensure FunctionApprovalRequestContent supports MCP server tool calls.""" + mcp_call = MCPServerToolCallContent(call_id="c-mcp", tool_name="tool", server_name="srv", arguments={"x": 1}) + req = FunctionApprovalRequestContent(id="req-mcp", function_call=mcp_call) + + assert isinstance(req.function_call, MCPServerToolCallContent) + assert req.function_call.call_id == "c-mcp" + + # region BaseContent Serialization diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index 03510a2345..778ce843ee 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -26,6 +26,8 @@ ChatMessage, ChatResponse, ChatResponseUpdate, + CodeInterpreterToolCallContent, + CodeInterpreterToolResultContent, DataContent, FunctionApprovalRequestContent, FunctionApprovalResponseContent, @@ -34,9 +36,12 @@ HostedCodeInterpreterTool, HostedFileContent, HostedFileSearchTool, + HostedImageGenerationTool, HostedMCPTool, HostedVectorStoreContent, HostedWebSearchTool, + ImageGenerationToolCallContent, + ImageGenerationToolResultContent, MCPStreamableHTTPTool, Role, TextContent, @@ -612,11 +617,14 @@ def test_response_content_creation_with_code_interpreter() -> None: response = client._parse_response_from_openai(mock_response, chat_options=ChatOptions()) # type: ignore assert len(response.messages[0].contents) == 2 - assert isinstance(response.messages[0].contents[0], TextContent) - assert response.messages[0].contents[0].text == "Code execution log" - assert isinstance(response.messages[0].contents[1], UriContent) - assert response.messages[0].contents[1].uri == "https://example.com/image.png" - assert response.messages[0].contents[1].media_type == "image" + call_content, result_content = response.messages[0].contents + assert isinstance(call_content, CodeInterpreterToolCallContent) + assert call_content.inputs is not None + assert isinstance(call_content.inputs[0], TextContent) + assert isinstance(result_content, CodeInterpreterToolResultContent) + assert result_content.outputs is not None + assert any(isinstance(out, TextContent) for out in result_content.outputs) + assert any(isinstance(out, UriContent) for out in result_content.outputs) def test_response_content_creation_with_function_call() -> None: @@ -761,14 +769,13 @@ def test_prepare_tools_for_openai_with_raw_image_generation() -> None: """Test that raw image_generation tool dict is handled correctly with parameter mapping.""" client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - # Test with raw tool dict using user-friendly parameter names + # Test with raw tool dict using OpenAI parameters directly tool = { "type": "image_generation", "size": "1536x1024", "quality": "high", - "format": "webp", # Will be mapped to output_format - "compression": 75, # Will be mapped to output_compression - "background": "transparent", + "output_format": "webp", + "output_quality": 75, } resp_tools = client._prepare_tools_for_openai([tool]) @@ -780,10 +787,8 @@ def test_prepare_tools_for_openai_with_raw_image_generation() -> None: assert image_tool["type"] == "image_generation" assert image_tool["size"] == "1536x1024" assert image_tool["quality"] == "high" - assert image_tool["background"] == "transparent" - # Check parameter name mapping assert image_tool["output_format"] == "webp" - assert image_tool["output_compression"] == 75 + assert image_tool["output_quality"] == 75 def test_prepare_tools_for_openai_with_raw_image_generation_openai_responses_params() -> None: @@ -797,7 +802,7 @@ def test_prepare_tools_for_openai_with_raw_image_generation_openai_responses_par "model": "gpt-image-1", "input_fidelity": "high", "moderation": "strict", - "partial_images": 2, # Should be integer 0-3 + "output_format": "png", } resp_tools = client._prepare_tools_for_openai([tool]) @@ -815,7 +820,7 @@ def test_prepare_tools_for_openai_with_raw_image_generation_openai_responses_par assert tool_dict["model"] == "gpt-image-1" assert tool_dict["input_fidelity"] == "high" assert tool_dict["moderation"] == "strict" - assert tool_dict["partial_images"] == 2 + assert tool_dict["output_format"] == "png" def test_prepare_tools_for_openai_with_raw_image_generation_minimal() -> None: @@ -836,6 +841,24 @@ def test_prepare_tools_for_openai_with_raw_image_generation_minimal() -> None: assert len(image_tool) == 1 +def test_prepare_tools_for_openai_with_hosted_image_generation() -> None: + """Test HostedImageGenerationTool conversion.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + tool = HostedImageGenerationTool( + description="Generate images", + options={"output_format": "png", "size": "512x512"}, + additional_properties={"quality": "high"}, + ) + + resp_tools = client._prepare_tools_for_openai([tool]) + assert len(resp_tools) == 1 + image_tool = resp_tools[0] + assert image_tool["type"] == "image_generation" + assert image_tool["output_format"] == "png" + assert image_tool["size"] == "512x512" + assert image_tool["quality"] == "high" + + def test_parse_chunk_from_openai_with_mcp_approval_request() -> None: """Test that a streaming mcp_approval_request event is parsed into FunctionApprovalRequestContent.""" client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") @@ -1278,9 +1301,11 @@ def test_parse_chunk_from_openai_code_interpreter() -> None: result = client._parse_chunk_from_openai(mock_event_image, chat_options, function_call_ids) # type: ignore assert len(result.contents) == 1 - assert isinstance(result.contents[0], UriContent) - assert result.contents[0].uri == "https://example.com/plot.png" - assert result.contents[0].media_type == "image" + assert isinstance(result.contents[0], CodeInterpreterToolResultContent) + assert result.contents[0].outputs + assert any( + isinstance(out, UriContent) and out.uri == "https://example.com/plot.png" for out in result.contents[0].outputs + ) def test_parse_chunk_from_openai_reasoning() -> None: @@ -1495,12 +1520,16 @@ def test_parse_response_from_openai_image_generation_raw_base64(): with patch.object(client, "_get_metadata_from_response", return_value={}): response = client._parse_response_from_openai(mock_response, chat_options=ChatOptions()) # type: ignore - # Verify the response contains DataContent with proper URI and media_type - assert len(response.messages[0].contents) == 1 - content = response.messages[0].contents[0] - assert isinstance(content, DataContent) - assert content.uri.startswith("data:image/png;base64,") - assert content.media_type == "image/png" + # Verify the response contains call + result with DataContent output + assert len(response.messages[0].contents) == 2 + call_content, result_content = response.messages[0].contents + assert isinstance(call_content, ImageGenerationToolCallContent) + assert isinstance(result_content, ImageGenerationToolResultContent) + assert result_content.outputs + data_out = result_content.outputs + assert isinstance(data_out, DataContent) + assert data_out.uri.startswith("data:image/png;base64,") + assert data_out.media_type == "image/png" def test_parse_response_from_openai_image_generation_existing_data_uri(): @@ -1521,19 +1550,23 @@ def test_parse_response_from_openai_image_generation_existing_data_uri(): valid_webp_base64 = base64.b64encode(webp_signature + b"VP8 fake_data").decode() mock_item = MagicMock() mock_item.type = "image_generation_call" - mock_item.result = f"data:image/webp;base64,{valid_webp_base64}" + mock_item.result = valid_webp_base64 mock_response.output = [mock_item] with patch.object(client, "_get_metadata_from_response", return_value={}): response = client._parse_response_from_openai(mock_response, chat_options=ChatOptions()) # type: ignore - # Verify the response contains DataContent with proper media_type parsed from URI - assert len(response.messages[0].contents) == 1 - content = response.messages[0].contents[0] - assert isinstance(content, DataContent) - assert content.uri == f"data:image/webp;base64,{valid_webp_base64}" - assert content.media_type == "image/webp" + # Verify the response contains call + result with DataContent output + assert len(response.messages[0].contents) == 2 + call_content, result_content = response.messages[0].contents + assert isinstance(call_content, ImageGenerationToolCallContent) + assert isinstance(result_content, ImageGenerationToolResultContent) + assert result_content.outputs + data_out = result_content.outputs + assert isinstance(data_out, DataContent) + assert data_out.uri == f"data:image/webp;base64,{valid_webp_base64}" + assert data_out.media_type == "image/webp" def test_parse_response_from_openai_image_generation_format_detection(): @@ -1559,10 +1592,12 @@ def test_parse_response_from_openai_image_generation_format_detection(): with patch.object(client, "_get_metadata_from_response", return_value={}): response_jpeg = client._parse_response_from_openai(mock_response_jpeg, chat_options=ChatOptions()) # type: ignore - content_jpeg = response_jpeg.messages[0].contents[0] - assert isinstance(content_jpeg, DataContent) - assert content_jpeg.media_type == "image/jpeg" - assert "data:image/jpeg;base64," in content_jpeg.uri + result_contents = response_jpeg.messages[0].contents + assert isinstance(result_contents[1], ImageGenerationToolResultContent) + outputs = result_contents[1].outputs + assert outputs and isinstance(outputs, DataContent) + assert outputs.media_type == "image/jpeg" + assert "data:image/jpeg;base64," in outputs.uri # Test WEBP detection webp_signature = b"RIFF" + b"\x00\x00\x00\x00" + b"WEBP" @@ -1583,10 +1618,10 @@ def test_parse_response_from_openai_image_generation_format_detection(): with patch.object(client, "_get_metadata_from_response", return_value={}): response_webp = client._parse_response_from_openai(mock_response_webp, chat_options=ChatOptions()) # type: ignore - content_webp = response_webp.messages[0].contents[0] - assert isinstance(content_webp, DataContent) - assert content_webp.media_type == "image/webp" - assert "data:image/webp;base64," in content_webp.uri + outputs_webp = response_webp.messages[0].contents[1].outputs + assert outputs_webp and isinstance(outputs_webp, DataContent) + assert outputs_webp.media_type == "image/webp" + assert "data:image/webp;base64," in outputs_webp.uri def test_parse_response_from_openai_image_generation_fallback(): @@ -1615,9 +1650,11 @@ def test_parse_response_from_openai_image_generation_fallback(): response = client._parse_response_from_openai(mock_response, chat_options=ChatOptions()) # type: ignore # Verify it falls back to PNG format for unrecognized binary data - assert len(response.messages[0].contents) == 1 - content = response.messages[0].contents[0] - assert isinstance(content, DataContent) + assert len(response.messages[0].contents) == 2 + result_content = response.messages[0].contents[1] + assert isinstance(result_content, ImageGenerationToolResultContent) + assert result_content.outputs + content = result_content.outputs assert content.media_type == "image/png" assert f"data:image/png;base64,{unrecognized_base64}" == content.uri @@ -2153,38 +2190,30 @@ async def test_openai_responses_client_agent_hosted_code_interpreter_tool(): @pytest.mark.flaky @skip_if_openai_integration_tests_disabled -async def test_openai_responses_client_agent_raw_image_generation_tool(): +async def test_openai_responses_client_agent_image_generation_tool(): """Test OpenAI Responses Client agent with raw image_generation tool through OpenAIResponsesClient.""" async with ChatAgent( chat_client=OpenAIResponsesClient(), instructions="You are a helpful assistant that can generate images.", - tools=[{"type": "image_generation", "size": "1024x1024", "quality": "low", "format": "png"}], + tools=HostedImageGenerationTool(options={"image_size": "1024x1024", "media_type": "png"}), ) as agent: # Test image generation functionality response = await agent.run("Generate an image of a cute red panda sitting on a tree branch in a forest.") assert isinstance(response, AgentRunResponse) + assert response.messages - # For image generation, we expect to get some response content - # This could be DataContent with image data, UriContent - assert response.messages is not None and len(response.messages) > 0 - - # Check that we have some kind of content in the response - total_contents = sum(len(message.contents) for message in response.messages) - assert total_contents > 0, f"Expected some content in response messages, got {total_contents} contents" - - # Verify we got image content - look for DataContent with URI starting with "data:image" + # Verify we got image content - look for ImageGenerationToolResultContent image_content_found = False for message in response.messages: for content in message.contents: - uri = getattr(content, "uri", None) - if uri and uri.startswith("data:image"): + if content.type == "image_generation_tool_result" and content.outputs: image_content_found = True break if image_content_found: break - # The test passes if we got image content (which we did based on the visible base64 output) + # The test passes if we got image content assert image_content_found, "Expected to find image content in response" @@ -2306,26 +2335,24 @@ async def test_openai_responses_client_agent_chat_options_agent_level() -> None: async def test_openai_responses_client_agent_hosted_mcp_tool() -> None: """Integration test for HostedMCPTool with OpenAI Response Agent using Microsoft Learn MCP.""" - mcp_tool = HostedMCPTool( - name="Microsoft Learn MCP", - url="https://learn.microsoft.com/api/mcp", - description="A Microsoft Learn MCP server for documentation questions", - approval_mode="never_require", - ) - async with ChatAgent( chat_client=OpenAIResponsesClient(), instructions="You are a helpful assistant that can help with microsoft documentation questions.", - tools=[mcp_tool], + tools=HostedMCPTool( + name="Microsoft Learn MCP", + url="https://learn.microsoft.com/api/mcp", + description="A Microsoft Learn MCP server for documentation questions", + approval_mode="never_require", + ), ) as agent: response = await agent.run( "How to create an Azure storage account using az cli?", - max_tokens=200, + # this needs to be high enough to handle the full MCP tool response. + max_tokens=5000, ) assert isinstance(response, AgentRunResponse) - assert response.text is not None - assert len(response.text) > 0 + assert response.text # Should contain Azure-related content since it's asking about Azure CLI assert any(term in response.text.lower() for term in ["azure", "storage", "account", "cli"]) diff --git a/python/packages/lab/lightning/tests/test_lightning.py b/python/packages/lab/lightning/tests/test_lightning.py index 9b56b59d9c..5f85532de1 100644 --- a/python/packages/lab/lightning/tests/test_lightning.py +++ b/python/packages/lab/lightning/tests/test_lightning.py @@ -2,15 +2,14 @@ """Tests for lightning module.""" +# ruff: noqa from unittest.mock import AsyncMock, patch import pytest -from agent_framework import ( - AgentExecutor, - AgentRunEvent, - ChatAgent, - WorkflowBuilder, -) + +agentlightning = pytest.importorskip("agentlightning") + +from agent_framework import AgentExecutor, AgentRunEvent, ChatAgent, WorkflowBuilder from agent_framework.lab.lightning import AgentFrameworkTracer from agent_framework.openai import OpenAIChatClient from agentlightning import TracerTraceToTriplet diff --git a/python/packages/ollama/pyproject.toml b/python/packages/ollama/pyproject.toml index 85e42a60c8..76cd25db74 100644 --- a/python/packages/ollama/pyproject.toml +++ b/python/packages/ollama/pyproject.toml @@ -57,7 +57,7 @@ omit = [ ] [tool.pyright] -extend = "../../pyproject.toml" +extends = "../../pyproject.toml" exclude = ['tests'] [tool.mypy] diff --git a/python/samples/getting_started/agents/azure_ai/azure_ai_with_image_generation.py b/python/samples/getting_started/agents/azure_ai/azure_ai_with_image_generation.py index 2fcc0d09c1..8274c43ab0 100644 --- a/python/samples/getting_started/agents/azure_ai/azure_ai_with_image_generation.py +++ b/python/samples/getting_started/agents/azure_ai/azure_ai_with_image_generation.py @@ -3,7 +3,7 @@ from pathlib import Path import aiofiles -from agent_framework import DataContent +from agent_framework import DataContent, HostedImageGenerationTool from agent_framework.azure import AzureAIClient from azure.identity.aio import AzureCliCredential @@ -29,12 +29,13 @@ async def main() -> None: name="ImageGenAgent", instructions="Generate images based on user requirements.", tools=[ - { - "type": "image_generation", - "model": "gpt-image-1-mini", - "quality": "low", - "size": "1024x1024", - } + HostedImageGenerationTool( + options={ + "model": "gpt-image-1-mini", + "quality": "low", + "size": "1024x1024", + } + ) ], ) as agent, ): diff --git a/python/samples/getting_started/agents/openai/openai_responses_client_image_generation.py b/python/samples/getting_started/agents/openai/openai_responses_client_image_generation.py index a437547d93..65f8ac9fd2 100644 --- a/python/samples/getting_started/agents/openai/openai_responses_client_image_generation.py +++ b/python/samples/getting_started/agents/openai/openai_responses_client_image_generation.py @@ -3,7 +3,7 @@ import asyncio import base64 -from agent_framework import DataContent, UriContent +from agent_framework import DataContent, HostedImageGenerationTool, ImageGenerationToolResultContent, UriContent from agent_framework.openai import OpenAIResponsesClient """ @@ -51,14 +51,12 @@ async def main() -> None: agent = OpenAIResponsesClient().create_agent( instructions="You are a helpful AI that can generate images.", tools=[ - { - "type": "image_generation", - # Core parameters - "size": "1024x1024", - "background": "transparent", - "quality": "low", - "format": "webp", - } + HostedImageGenerationTool( + options={ + "size": "1024x1024", + "output_format": "webp", + } + ) ], ) @@ -72,9 +70,11 @@ async def main() -> None: # Show information about the generated image for message in result.messages: for content in message.contents: - if isinstance(content, (DataContent, UriContent)) and content.uri: - show_image_info(content.uri) - break + if isinstance(content, ImageGenerationToolResultContent) and content.outputs: + for output in content.outputs: + if isinstance(output, (DataContent, UriContent)) and output.uri: + show_image_info(output.uri) + break if __name__ == "__main__": diff --git a/python/samples/getting_started/agents/openai/openai_responses_client_streaming_image_generation.py b/python/samples/getting_started/agents/openai/openai_responses_client_streaming_image_generation.py index 2d74429917..9a81cec590 100644 --- a/python/samples/getting_started/agents/openai/openai_responses_client_streaming_image_generation.py +++ b/python/samples/getting_started/agents/openai/openai_responses_client_streaming_image_generation.py @@ -4,7 +4,7 @@ import base64 import anyio -from agent_framework import DataContent +from agent_framework import DataContent, HostedImageGenerationTool from agent_framework.openai import OpenAIResponsesClient """OpenAI Responses Client Streaming Image Generation Example @@ -45,12 +45,13 @@ async def main(): agent = OpenAIResponsesClient().create_agent( instructions="You are a helpful agent that can generate images.", tools=[ - { - "type": "image_generation", - "size": "1024x1024", - "quality": "high", - "partial_images": 3, - } + HostedImageGenerationTool( + options={ + "size": "1024x1024", + "quality": "high", + "partial_images": 3, + } + ) ], ) diff --git a/python/samples/getting_started/agents/openai/openai_responses_client_with_code_interpreter.py b/python/samples/getting_started/agents/openai/openai_responses_client_with_code_interpreter.py index c8f393b780..8f55bdfbf9 100644 --- a/python/samples/getting_started/agents/openai/openai_responses_client_with_code_interpreter.py +++ b/python/samples/getting_started/agents/openai/openai_responses_client_with_code_interpreter.py @@ -2,10 +2,14 @@ import asyncio -from agent_framework import ChatAgent, ChatResponse, HostedCodeInterpreterTool +from agent_framework import ( + ChatAgent, + CodeInterpreterToolCallContent, + CodeInterpreterToolResultContent, + HostedCodeInterpreterTool, + TextContent, +) from agent_framework.openai import OpenAIResponsesClient -from openai.types.responses.response import Response as OpenAIResponse -from openai.types.responses.response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall """ OpenAI Responses Client with Code Interpreter Example @@ -30,15 +34,20 @@ async def main() -> None: result = await agent.run(query) print(f"Result: {result}\n") - if ( - isinstance(result.raw_representation, ChatResponse) - and isinstance(result.raw_representation.raw_representation, OpenAIResponse) - and len(result.raw_representation.raw_representation.output) > 0 - and isinstance(result.raw_representation.raw_representation.output[0], ResponseCodeInterpreterToolCall) - ): - generated_code = result.raw_representation.raw_representation.output[0].code - - print(f"Generated code:\n{generated_code}") + for message in result.messages: + code_blocks = [c for c in message.contents if isinstance(c, CodeInterpreterToolCallContent)] + outputs = [c for c in message.contents if isinstance(c, CodeInterpreterToolResultContent)] + if code_blocks: + code_inputs = code_blocks[0].inputs or [] + for content in code_inputs: + if isinstance(content, TextContent): + print(f"Generated code:\n{content.text}") + break + if outputs: + print("Execution outputs:") + for out in outputs[0].outputs or []: + if isinstance(out, TextContent): + print(out.text) if __name__ == "__main__":