From b917b6202205b1fb789dc84e1c9887a3ef07345a Mon Sep 17 00:00:00 2001 From: tom Date: Mon, 13 Apr 2026 13:00:20 -0700 Subject: [PATCH] [fix]: Gemini provider - handle content block tool outputs in Responses API path When function_call_output messages arrive via the Anthropic Responses API format, their output is an array of content blocks (ResponsesFunctionToolCallOutputBlocks), not a plain string (ResponsesToolCallOutputStr). The Gemini provider's convertResponsesMessagesToGeminiContents only checked the string case, silently dropping all tool result content and sending empty {} responses to Gemini. This caused the model to loop endlessly retrying tool calls it never saw results for. Other providers (Bedrock, OpenAI, Cohere) already handle both output formats. This aligns the Gemini provider with them. Affected packages: - core/providers/gemini/responses.go - Add ResponsesFunctionToolCallOutputBlocks handling - core/providers/gemini/gemini_test.go - Add test for content block outputs Co-Authored-By: Claude Opus 4.6 (1M context) --- core/changelog.md | 1 + core/providers/gemini/gemini_test.go | 64 ++++++++++++++++++++++++++++ core/providers/gemini/responses.go | 24 +++++++++++ 3 files changed, 89 insertions(+) diff --git a/core/changelog.md b/core/changelog.md index e69de29bb2..f9b42fe653 100644 --- a/core/changelog.md +++ b/core/changelog.md @@ -0,0 +1 @@ +[fix]: Gemini provider - handle content block tool outputs in Responses API path diff --git a/core/providers/gemini/gemini_test.go b/core/providers/gemini/gemini_test.go index 558df9bda7..ecaec0cf70 100644 --- a/core/providers/gemini/gemini_test.go +++ b/core/providers/gemini/gemini_test.go @@ -1936,6 +1936,70 @@ func TestResponsesAPIParallelFunctionCalling(t *testing.T) { } }, }, + { + name: "ResponsesAPI_FunctionCallOutput_ContentBlocks", + input: &schemas.BifrostResponsesRequest{ + Provider: schemas.Gemini, + Model: "gemini-2.0-flash", + Input: []schemas.ResponsesMessage{ + { + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Content: &schemas.ResponsesMessageContent{ + ContentStr: schemas.Ptr("List browser tabs"), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("call_tabs"), + Name: schemas.Ptr("browser_tabs"), + Arguments: schemas.Ptr(`{"action":"list"}`), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("call_tabs"), + Output: &schemas.ResponsesToolMessageOutputStruct{ + // Output as content blocks (Anthropic Responses API format) + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesInputMessageContentBlockTypeText, + Text: schemas.Ptr("### Open tabs\n- 0: (current) [Google] (https://google.com)\n- 1: [GitHub] (https://github.com)\n"), + }, + }, + }, + }, + }, + }, + }, + validate: func(t *testing.T, result *gemini.GeminiGenerationRequest) { + // Find the Content with function response + var toolResponseContent *gemini.Content + for i := range result.Contents { + content := &result.Contents[i] + if len(content.Parts) > 0 && content.Parts[0].FunctionResponse != nil { + toolResponseContent = content + break + } + } + + require.NotNil(t, toolResponseContent, "Should have a content with functionResponse") + require.Len(t, toolResponseContent.Parts, 1) + + part := toolResponseContent.Parts[0] + require.NotNil(t, part.FunctionResponse, "Part must have functionResponse") + assert.Equal(t, "call_tabs", part.FunctionResponse.ID) + assert.Equal(t, "browser_tabs", part.FunctionResponse.Name) + + // Verify the response data contains the tool output (not empty) + require.NotNil(t, part.FunctionResponse.Response, "FunctionResponse.Response must not be nil") + responseStr := string(part.FunctionResponse.Response) + assert.Contains(t, responseStr, "Open tabs", "Response should contain the tool output text") + assert.Contains(t, responseStr, "Google", "Response should contain tab content") + }, + }, } for _, tt := range tests { diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go index c9a8af93ba..8f2377cc78 100644 --- a/core/providers/gemini/responses.go +++ b/core/providers/gemini/responses.go @@ -3016,6 +3016,30 @@ func convertResponsesMessagesToGeminiContents(messages []schemas.ResponsesMessag } else { responseMap["output"] = output } + } else if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil { + // Handle structured output blocks (e.g. from Anthropic Responses API format + // where output is an array of content blocks like [{"type":"input_text","text":"..."}]) + var textParts []string + for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { + if block.Text != nil && *block.Text != "" { + textParts = append(textParts, *block.Text) + } + } + if len(textParts) > 0 { + combined := strings.Join(textParts, "\n") + if json.Valid([]byte(combined)) { + responseMap["output"] = json.RawMessage(combined) + } else { + responseMap["output"] = combined + } + } else { + // Fallback for non-text blocks (e.g. images, files): marshal the raw blocks + // so responseMap["output"] is never left empty when blocks are present + rawBlocks, err := providerUtils.MarshalSorted(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks) + if err == nil && len(rawBlocks) > 0 { + responseMap["output"] = json.RawMessage(rawBlocks) + } + } } else if msg.Content != nil && msg.Content.ContentStr != nil { // Fallback to Content.ContentStr for backward compatibility output := *msg.Content.ContentStr