diff --git a/core/changelog.md b/core/changelog.md index 619c913aa2..38f89fedac 100644 --- a/core/changelog.md +++ b/core/changelog.md @@ -1,2 +1,3 @@ +- fix: Gemini provider - handle content block tool outputs in Responses API path - fix: case-insensitive `anthropic-beta` merge in `MergeBetaHeaders` - fix: Bedrock provider - emit message_stop event for Anthropic invoke stream [@tefimov](https://github.com/tefimov) diff --git a/core/providers/gemini/gemini_test.go b/core/providers/gemini/gemini_test.go index 558df9bda7..ecaec0cf70 100644 --- a/core/providers/gemini/gemini_test.go +++ b/core/providers/gemini/gemini_test.go @@ -1936,6 +1936,70 @@ func TestResponsesAPIParallelFunctionCalling(t *testing.T) { } }, }, + { + name: "ResponsesAPI_FunctionCallOutput_ContentBlocks", + input: &schemas.BifrostResponsesRequest{ + Provider: schemas.Gemini, + Model: "gemini-2.0-flash", + Input: []schemas.ResponsesMessage{ + { + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Content: &schemas.ResponsesMessageContent{ + ContentStr: schemas.Ptr("List browser tabs"), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("call_tabs"), + Name: schemas.Ptr("browser_tabs"), + Arguments: schemas.Ptr(`{"action":"list"}`), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("call_tabs"), + Output: &schemas.ResponsesToolMessageOutputStruct{ + // Output as content blocks (Anthropic Responses API format) + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesInputMessageContentBlockTypeText, + Text: schemas.Ptr("### Open tabs\n- 0: (current) [Google] (https://google.com)\n- 1: [GitHub] (https://github.com)\n"), + }, + }, + }, + }, + }, + }, + }, + validate: func(t *testing.T, result *gemini.GeminiGenerationRequest) { + // Find the Content with function response + var toolResponseContent *gemini.Content + for i := range result.Contents { + content := &result.Contents[i] + if len(content.Parts) > 0 && content.Parts[0].FunctionResponse != nil { + toolResponseContent = content + break + } + } + + require.NotNil(t, toolResponseContent, "Should have a content with functionResponse") + require.Len(t, toolResponseContent.Parts, 1) + + part := toolResponseContent.Parts[0] + require.NotNil(t, part.FunctionResponse, "Part must have functionResponse") + assert.Equal(t, "call_tabs", part.FunctionResponse.ID) + assert.Equal(t, "browser_tabs", part.FunctionResponse.Name) + + // Verify the response data contains the tool output (not empty) + require.NotNil(t, part.FunctionResponse.Response, "FunctionResponse.Response must not be nil") + responseStr := string(part.FunctionResponse.Response) + assert.Contains(t, responseStr, "Open tabs", "Response should contain the tool output text") + assert.Contains(t, responseStr, "Google", "Response should contain tab content") + }, + }, } for _, tt := range tests { diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go index c9a8af93ba..8f2377cc78 100644 --- a/core/providers/gemini/responses.go +++ b/core/providers/gemini/responses.go @@ -3016,6 +3016,30 @@ func convertResponsesMessagesToGeminiContents(messages []schemas.ResponsesMessag } else { responseMap["output"] = output } + } else if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil { + // Handle structured output blocks (e.g. from Anthropic Responses API format + // where output is an array of content blocks like [{"type":"input_text","text":"..."}]) + var textParts []string + for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { + if block.Text != nil && *block.Text != "" { + textParts = append(textParts, *block.Text) + } + } + if len(textParts) > 0 { + combined := strings.Join(textParts, "\n") + if json.Valid([]byte(combined)) { + responseMap["output"] = json.RawMessage(combined) + } else { + responseMap["output"] = combined + } + } else { + // Fallback for non-text blocks (e.g. images, files): marshal the raw blocks + // so responseMap["output"] is never left empty when blocks are present + rawBlocks, err := providerUtils.MarshalSorted(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks) + if err == nil && len(rawBlocks) > 0 { + responseMap["output"] = json.RawMessage(rawBlocks) + } + } } else if msg.Content != nil && msg.Content.ContentStr != nil { // Fallback to Content.ContentStr for backward compatibility output := *msg.Content.ContentStr