From 76c11a6b6442f6be526f59a0744ce5752f0501fc Mon Sep 17 00:00:00 2001 From: siddseethepalli Date: Sat, 14 Feb 2026 08:01:11 +0000 Subject: [PATCH] Pass tool result images to OpenAI and Gemini providers Tool results with image contentBlocks (e.g. browser_screenshot) were silently dropped by the OpenAI and Gemini providers because their APIs don't support images in tool/function-response messages. Fix by injecting images as user message content (OpenAI) or sibling inline data parts (Gemini), which both APIs support. Co-Authored-By: Claude --- assistant/src/providers/gemini/client.ts | 14 +++++++++++++- assistant/src/providers/openai/client.ts | 16 ++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/assistant/src/providers/gemini/client.ts b/assistant/src/providers/gemini/client.ts index 5bd79299d0b..8e5986f2730 100644 --- a/assistant/src/providers/gemini/client.ts +++ b/assistant/src/providers/gemini/client.ts @@ -213,7 +213,6 @@ export class GeminiProvider implements Provider { break; case 'tool_result': { let outputText = block.content; - // Extract additional text from contentBlocks (Gemini function responses only support text) if (block.contentBlocks && block.contentBlocks.length > 0) { const extraText = block.contentBlocks .filter((cb): cb is Extract => cb.type === 'text') @@ -221,6 +220,19 @@ export class GeminiProvider implements Provider { if (extraText.length > 0) { outputText = outputText + '\n' + extraText.join('\n'); } + // Include images as inline data parts alongside the function response + // (Gemini function responses only support text, but images can be + // added as sibling parts in the same user message). + for (const cb of block.contentBlocks) { + if (cb.type === 'image') { + parts.push({ + inlineData: { + mimeType: cb.source.media_type, + data: cb.source.data, + }, + }); + } + } } parts.push({ functionResponse: { diff --git a/assistant/src/providers/openai/client.ts b/assistant/src/providers/openai/client.ts index e8300577467..fcbc4e3bc1f 100644 --- a/assistant/src/providers/openai/client.ts +++ b/assistant/src/providers/openai/client.ts @@ -176,9 +176,11 @@ export class OpenAIProvider implements Provider { ); // Emit tool results as separate tool-role messages + // OpenAI's API only supports string content in tool messages, so images + // from contentBlocks are collected and injected as a user message below. + const toolResultImages: ContentBlock[] = []; for (const tr of toolResults) { let textContent = tr.content; - // Extract additional text from contentBlocks (images can't be represented in OpenAI tool results) if (tr.contentBlocks && tr.contentBlocks.length > 0) { const extraText = tr.contentBlocks .filter((cb): cb is Extract => cb.type === 'text') @@ -186,6 +188,9 @@ export class OpenAIProvider implements Provider { if (extraText.length > 0) { textContent = textContent + '\n' + extraText.join('\n'); } + for (const cb of tr.contentBlocks) { + if (cb.type === 'image') toolResultImages.push(cb); + } } result.push({ role: 'tool', @@ -194,9 +199,12 @@ export class OpenAIProvider implements Provider { }); } - // Emit remaining content as a user message (if any) - if (otherBlocks.length > 0) { - result.push(this.toOpenAIUserMessage(otherBlocks)); + // Emit remaining content + any tool result images as a user message. + // Images from tool results (e.g. browser_screenshot) must go in a user + // message because OpenAI-compatible APIs don't support images in tool messages. + const userContent = [...otherBlocks, ...toolResultImages]; + if (userContent.length > 0) { + result.push(this.toOpenAIUserMessage(userContent)); } } }