PrefectHQ · jlowin · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/src/fastmcp/client/sampling/handlers/anthropic.py b/src/fastmcp/client/sampling/handlers/anthropic.py
@@ -3,10 +3,11 @@
 from collections.abc import Iterator, Sequence
 from typing import Any
 
-from mcp.types import CreateMessageRequestParams as SamplingParams
 from mcp.types import (
+    AudioContent,
     CreateMessageResult,
     CreateMessageResultWithTools,
+    ImageContent,
     ModelPreferences,
     SamplingMessage,
     SamplingMessageContentBlock,
@@ -17,10 +18,13 @@
     ToolResultContent,
     ToolUseContent,
 )
+from mcp.types import CreateMessageRequestParams as SamplingParams
 
 try:
     from anthropic import AsyncAnthropic
     from anthropic.types import (
+        Base64ImageSourceParam,
+        ImageBlockParam,
         Message,
         MessageParam,
         TextBlock,
@@ -42,6 +46,28 @@
 
 __all__ = ["AnthropicSamplingHandler"]
 
+# Anthropic supports these image MIME types
+_ANTHROPIC_IMAGE_MEDIA_TYPES = frozenset(
+    {"image/jpeg", "image/png", "image/gif", "image/webp"}
+)
+
+
+def _image_content_to_anthropic_block(content: ImageContent) -> ImageBlockParam:
+    """Convert MCP ImageContent to Anthropic ImageBlockParam."""
+    if content.mimeType not in _ANTHROPIC_IMAGE_MEDIA_TYPES:
+        raise ValueError(
+            f"Unsupported image MIME type for Anthropic: {content.mimeType!r}. "
+            f"Supported types: {', '.join(sorted(_ANTHROPIC_IMAGE_MEDIA_TYPES))}"
+        )
+    return ImageBlockParam(
+        type="image",
+        source=Base64ImageSourceParam(
+            type="base64",
+            media_type=content.mimeType,  # type: ignore[arg-type]
+            data=content.data,
+        ),
+    )
+
 
 class AnthropicSamplingHandler:
     """Sampling handler that uses the Anthropic API.
@@ -155,7 +181,10 @@ def _convert_to_anthropic_messages(
             # Handle list content (from CreateMessageResultWithTools)
             if isinstance(content, list):
                 content_blocks: list[
-                    TextBlockParam | ToolUseBlockParam | ToolResultBlockParam
+                    TextBlockParam
+                    | ImageBlockParam
+                    | ToolUseBlockParam
+                    | ToolResultBlockParam
                 ] = []
 
                 for item in content:
@@ -172,6 +201,17 @@ def _convert_to_anthropic_messages(
                         content_blocks.append(
                             TextBlockParam(type="text", text=item.text)
                         )
+                    elif isinstance(item, ImageContent):
+                        if message.role != "user":
+                            raise ValueError(
+                                "ImageContent is only supported in user messages "
+                                "for Anthropic"
+                            )
+                        content_blocks.append(_image_content_to_anthropic_block(item))
+                    elif isinstance(item, AudioContent):
+                        raise ValueError(
+                            "AudioContent is not supported by the Anthropic API"
+                        )
                     elif isinstance(item, ToolResultContent):
                         # Extract text content from the result
                         result_content: str | list[TextBlockParam] = ""
@@ -262,6 +302,24 @@ def _convert_to_anthropic_messages(
                 )
                 continue
 
+            # Handle ImageContent
+            if isinstance(content, ImageContent):
+                if message.role != "user":
+                    raise ValueError(
+                        "ImageContent is only supported in user messages for Anthropic"
+                    )
+                anthropic_messages.append(
+                    MessageParam(
+                        role="user",
+                        content=[_image_content_to_anthropic_block(content)],
+                    )
+                )
+                continue
+
+            # Handle AudioContent - not supported by Anthropic
+            if isinstance(content, AudioContent):
+                raise ValueError("AudioContent is not supported by the Anthropic API")
+
             raise ValueError(f"Unsupported content type: {type(content)}")
 
         return anthropic_messages

diff --git a/src/fastmcp/client/sampling/handlers/google_genai.py b/src/fastmcp/client/sampling/handlers/google_genai.py
@@ -1,11 +1,13 @@
 """Google GenAI sampling handler with tool support for FastMCP 3.0."""
 
+import base64
 from collections.abc import Sequence
 from uuid import uuid4
 
 try:
     from google.genai import Client as GoogleGenaiClient
     from google.genai.types import (
+        Blob,
         Candidate,
         Content,
         FunctionCall,
@@ -197,6 +199,22 @@ def _sampling_content_to_google_genai_part(
     if isinstance(content, TextContent):
         return Part(text=content.text)
 
+    if isinstance(content, ImageContent):
+        return Part(
+            inline_data=Blob(
+                data=base64.b64decode(content.data),
+                mime_type=content.mimeType,
+            )
+        )
+
+    if isinstance(content, AudioContent):
+        return Part(
+            inline_data=Blob(
+                data=base64.b64decode(content.data),
+                mime_type=content.mimeType,
+            )
+        )
+
     if isinstance(content, ToolUseContent):
         # Note: thought_signature bypass is required for manually constructed tool calls.
         # Google's Gemini 3+ models enforce thought signature validation for function calls.

diff --git a/src/fastmcp/client/sampling/handlers/openai.py b/src/fastmcp/client/sampling/handlers/openai.py
@@ -6,10 +6,11 @@
 
 from mcp import ClientSession, ServerSession
 from mcp.shared.context import LifespanContextT, RequestContext
-from mcp.types import CreateMessageRequestParams as SamplingParams
 from mcp.types import (
+    AudioContent,
     CreateMessageResult,
     CreateMessageResultWithTools,
+    ImageContent,
     ModelPreferences,
     SamplingMessage,
     StopReason,
@@ -19,12 +20,17 @@
     ToolResultContent,
     ToolUseContent,
 )
+from mcp.types import CreateMessageRequestParams as SamplingParams
 
 try:
     from openai import AsyncOpenAI
     from openai.types.chat import (
         ChatCompletion,
         ChatCompletionAssistantMessageParam,
+        ChatCompletionContentPartImageParam,
+        ChatCompletionContentPartInputAudioParam,
+        ChatCompletionContentPartParam,
+        ChatCompletionContentPartTextParam,
         ChatCompletionMessageParam,
         ChatCompletionMessageToolCallParam,
         ChatCompletionSystemMessageParam,
@@ -41,6 +47,50 @@
         "Please install `fastmcp[openai]` or add `openai` to your dependencies manually."
     ) from e
 
+# OpenAI only supports wav and mp3 for input audio
+_OPENAI_AUDIO_FORMATS: dict[str, str] = {
+    "audio/wav": "wav",
+    "audio/x-wav": "wav",
+    "audio/mp3": "mp3",
+    "audio/mpeg": "mp3",
+}
+
+_OPENAI_IMAGE_MEDIA_TYPES: frozenset[str] = frozenset(
+    {"image/jpeg", "image/png", "image/gif", "image/webp"}
+)
+
+
+def _image_content_to_openai_part(
+    content: ImageContent,
+) -> ChatCompletionContentPartImageParam:
+    """Convert MCP ImageContent to OpenAI image_url content part."""
+    if content.mimeType not in _OPENAI_IMAGE_MEDIA_TYPES:
+        raise ValueError(
+            f"Unsupported image MIME type for OpenAI: {content.mimeType!r}. "
+            f"Supported types: {', '.join(sorted(_OPENAI_IMAGE_MEDIA_TYPES))}"
+        )
+    data_url = f"data:{content.mimeType};base64,{content.data}"
+    return ChatCompletionContentPartImageParam(
+        type="image_url",
+        image_url={"url": data_url},
+    )
+
+
+def _audio_content_to_openai_part(
+    content: AudioContent,
+) -> ChatCompletionContentPartInputAudioParam:
+    """Convert MCP AudioContent to OpenAI input_audio content part."""
+    audio_format = _OPENAI_AUDIO_FORMATS.get(content.mimeType)
+    if audio_format is None:
+        raise ValueError(
+            f"Unsupported audio MIME type for OpenAI: {content.mimeType!r}. "
+            f"Supported types: {', '.join(sorted(_OPENAI_AUDIO_FORMATS))}"
+        )
+    return ChatCompletionContentPartInputAudioParam(
+        type="input_audio",
+        input_audio={"data": content.data, "format": audio_format},
+    )
+
 
 class OpenAISamplingHandler:
     """Sampling handler that uses the OpenAI API."""
@@ -147,8 +197,9 @@ def _convert_to_openai_messages(
 
             # Handle list content (from CreateMessageResultWithTools)
             if isinstance(content, list):
-                # Collect tool calls and text from the list
+                # Collect tool calls, content parts, and text from the list
                 tool_calls: list[ChatCompletionMessageToolCallParam] = []
+                content_parts: list[ChatCompletionContentPartParam] = []
                 text_parts: list[str] = []
                 # Collect tool results separately to maintain correct ordering
                 tool_messages: list[ChatCompletionToolMessageParam] = []
@@ -167,6 +218,15 @@ def _convert_to_openai_messages(
                         )
                     elif isinstance(item, TextContent):
                         text_parts.append(item.text)
+                        content_parts.append(
+                            ChatCompletionContentPartTextParam(
+                                type="text", text=item.text
+                            )
+                        )
+                    elif isinstance(item, ImageContent):
+                        content_parts.append(_image_content_to_openai_part(item))
+                    elif isinstance(item, AudioContent):
+                        content_parts.append(_audio_content_to_openai_part(item))
                     elif isinstance(item, ToolResultContent):
                         # Collect tool results (added after assistant message)
                         content_text = ""
@@ -186,33 +246,47 @@ def _convert_to_openai_messages(
 
                 # Add assistant message with tool calls if present
                 # OpenAI requires: assistant (with tool_calls) -> tool messages
-                if tool_calls or text_parts:
-                    msg_content = "\n".join(text_parts) if text_parts else None
+                if tool_calls or content_parts:
                     if tool_calls:
+                        has_multimodal = len(content_parts) > len(text_parts)
+                        if has_multimodal:
+                            raise ValueError(
+                                "ImageContent/AudioContent is only supported "
+                                "in user messages for OpenAI"
+                            )
+                        text_str = "\n".join(text_parts) or None
                         openai_messages.append(
                             ChatCompletionAssistantMessageParam(
                                 role="assistant",
-                                content=msg_content,
+                                content=text_str,
                                 tool_calls=tool_calls,
                             )
                         )
                         # Add tool messages AFTER assistant message
                         openai_messages.extend(tool_messages)
-                    elif msg_content:
+                    elif content_parts:
                         if message.role == "user":
                             openai_messages.append(
                                 ChatCompletionUserMessageParam(
                                     role="user",
-                                    content=msg_content,
+                                    content=content_parts,
                                 )
                             )
                         else:
-                            openai_messages.append(
-                                ChatCompletionAssistantMessageParam(
-                                    role="assistant",
-                                    content=msg_content,
+                            has_multimodal = len(content_parts) > len(text_parts)
+                            if has_multimodal:
+                                raise ValueError(
+                                    "ImageContent/AudioContent is only supported "
+                                    "in user messages for OpenAI"
+                                )
+                            assistant_text = "\n".join(text_parts)
+                            if assistant_text:
+                                openai_messages.append(
+                                    ChatCompletionAssistantMessageParam(
+                                        role="assistant",
+                                        content=assistant_text,
+                                    )
                                 )
-                            )
                 elif tool_messages:
                     # Tool results only (assistant message was in previous message)
                     openai_messages.extend(tool_messages)
@@ -272,6 +346,34 @@ def _convert_to_openai_messages(
                     )
                 continue
 
+            # Handle ImageContent
+            if isinstance(content, ImageContent):
+                if message.role != "user":
+                    raise ValueError(
+                        "ImageContent is only supported in user messages for OpenAI"
+                    )
+                openai_messages.append(
+                    ChatCompletionUserMessageParam(
+                        role="user",
+                        content=[_image_content_to_openai_part(content)],
+                    )
+                )
+                continue
+
+            # Handle AudioContent
+            if isinstance(content, AudioContent):
+                if message.role != "user":
+                    raise ValueError(
+                        "AudioContent is only supported in user messages for OpenAI"
+                    )
+                openai_messages.append(
+                    ChatCompletionUserMessageParam(
+                        role="user",
+                        content=[_audio_content_to_openai_part(content)],
+                    )
+                )
+                continue
+
             raise ValueError(f"Unsupported content type: {type(content)}")
 
         return openai_messages