diff --git a/core/changelog.md b/core/changelog.md index 375c51adbc..18d559a226 100644 --- a/core/changelog.md +++ b/core/changelog.md @@ -2,3 +2,4 @@ - feat: add BifrostContextKeyMCPAddedTools to context to track MCP tools added to the request - refactor: standardize empty array conventions in bifrost. Empty array means deny all, ["*"] means allow all for models/tools/keys. - feat: add support for request-level extra headers in MCP tool execution using BifrostContextKeyMCPExtraHeaders key in context. +- fix: send back accumulated usage in MCP agent mode. \ No newline at end of file diff --git a/core/mcp/agent.go b/core/mcp/agent.go index 88ec11e5fb..cfb1290f5f 100644 --- a/core/mcp/agent.go +++ b/core/mcp/agent.go @@ -155,6 +155,9 @@ func (a *AgentModeExecutor) executeAgent( allExecutedToolResults := make([]*schemas.ChatMessage, 0) allExecutedToolCalls := make([]schemas.ChatAssistantMessageToolCall, 0) + // Accumulate token usage across all LLM calls in the agent loop + accumulatedUsage := adapter.extractUsage(currentResponse) + originalRequestID, ok := ctx.Value(schemas.BifrostContextKeyRequestID).(string) if ok { ctx.SetValue(schemas.BifrostMCPAgentOriginalRequestID, originalRequestID) @@ -335,6 +338,8 @@ func (a *AgentModeExecutor) executeAgent( if depth == 1 && len(allExecutedToolResults) == 0 { return currentResponse, nil } + // Apply accumulated usage before building the final response + adapter.applyUsage(currentResponse, accumulatedUsage) // Create response with all executed tool results from all iterations, and non-auto-executable tool calls return adapter.createResponseWithExecutedTools(currentResponse, allExecutedToolResults, allExecutedToolCalls, nonAutoExecutableTools), nil } @@ -357,11 +362,127 @@ func (a *AgentModeExecutor) executeAgent( } currentResponse = response + accumulatedUsage = mergeUsage(accumulatedUsage, adapter.extractUsage(currentResponse)) } + adapter.applyUsage(currentResponse, accumulatedUsage) return currentResponse, nil } +// mergeUsage sums token counts and costs from two BifrostLLMUsage values. +// Detail sub-fields are summed when both are present; if only one is non-nil it is kept as-is. +func mergeUsage(base, add *schemas.BifrostLLMUsage) *schemas.BifrostLLMUsage { + if add == nil { + return base + } + if base == nil { + return add + } + + merged := &schemas.BifrostLLMUsage{ + PromptTokens: base.PromptTokens + add.PromptTokens, + CompletionTokens: base.CompletionTokens + add.CompletionTokens, + TotalTokens: base.TotalTokens + add.TotalTokens, + } + + // Merge prompt token details + if base.PromptTokensDetails != nil || add.PromptTokensDetails != nil { + bd := base.PromptTokensDetails + ad := add.PromptTokensDetails + if bd == nil { + bd = &schemas.ChatPromptTokensDetails{} + } + if ad == nil { + ad = &schemas.ChatPromptTokensDetails{} + } + merged.PromptTokensDetails = &schemas.ChatPromptTokensDetails{ + TextTokens: bd.TextTokens + ad.TextTokens, + AudioTokens: bd.AudioTokens + ad.AudioTokens, + ImageTokens: bd.ImageTokens + ad.ImageTokens, + CachedReadTokens: bd.CachedReadTokens + ad.CachedReadTokens, + CachedWriteTokens: bd.CachedWriteTokens + ad.CachedWriteTokens, + } + } + + // Merge completion token details + if base.CompletionTokensDetails != nil || add.CompletionTokensDetails != nil { + bd := base.CompletionTokensDetails + ad := add.CompletionTokensDetails + if bd == nil { + bd = &schemas.ChatCompletionTokensDetails{} + } + if ad == nil { + ad = &schemas.ChatCompletionTokensDetails{} + } + merged.CompletionTokensDetails = &schemas.ChatCompletionTokensDetails{ + TextTokens: bd.TextTokens + ad.TextTokens, + AcceptedPredictionTokens: bd.AcceptedPredictionTokens + ad.AcceptedPredictionTokens, + AudioTokens: bd.AudioTokens + ad.AudioTokens, + ReasoningTokens: bd.ReasoningTokens + ad.ReasoningTokens, + RejectedPredictionTokens: bd.RejectedPredictionTokens + ad.RejectedPredictionTokens, + } + if bd.CitationTokens != nil || ad.CitationTokens != nil { + bct := 0 + act := 0 + if bd.CitationTokens != nil { + bct = *bd.CitationTokens + } + if ad.CitationTokens != nil { + act = *ad.CitationTokens + } + sum := bct + act + merged.CompletionTokensDetails.CitationTokens = &sum + } + if bd.NumSearchQueries != nil || ad.NumSearchQueries != nil { + bnsq := 0 + ansq := 0 + if bd.NumSearchQueries != nil { + bnsq = *bd.NumSearchQueries + } + if ad.NumSearchQueries != nil { + ansq = *ad.NumSearchQueries + } + sum := bnsq + ansq + merged.CompletionTokensDetails.NumSearchQueries = &sum + } + if bd.ImageTokens != nil || ad.ImageTokens != nil { + bit := 0 + ait := 0 + if bd.ImageTokens != nil { + bit = *bd.ImageTokens + } + if ad.ImageTokens != nil { + ait = *ad.ImageTokens + } + sum := bit + ait + merged.CompletionTokensDetails.ImageTokens = &sum + } + } + + // Merge cost + if base.Cost != nil || add.Cost != nil { + bc := base.Cost + ac := add.Cost + if bc == nil { + bc = &schemas.BifrostCost{} + } + if ac == nil { + ac = &schemas.BifrostCost{} + } + merged.Cost = &schemas.BifrostCost{ + InputTokensCost: bc.InputTokensCost + ac.InputTokensCost, + OutputTokensCost: bc.OutputTokensCost + ac.OutputTokensCost, + ReasoningTokensCost: bc.ReasoningTokensCost + ac.ReasoningTokensCost, + CitationTokensCost: bc.CitationTokensCost + ac.CitationTokensCost, + SearchQueriesCost: bc.SearchQueriesCost + ac.SearchQueriesCost, + RequestCost: bc.RequestCost + ac.RequestCost, + TotalCost: bc.TotalCost + ac.TotalCost, + } + } + + return merged +} + // extractToolCalls extracts all tool calls from a chat response. // It iterates through all choices in the response and collects tool calls // from assistant messages. diff --git a/core/mcp/agentadaptors.go b/core/mcp/agentadaptors.go index 3a32694d3e..68745b4805 100644 --- a/core/mcp/agentadaptors.go +++ b/core/mcp/agentadaptors.go @@ -60,6 +60,12 @@ type agentAPIAdapter interface { executedToolCalls []schemas.ChatAssistantMessageToolCall, nonAutoExecutableToolCalls []schemas.ChatAssistantMessageToolCall, ) interface{} + + // extractUsage returns the token usage from a response as BifrostLLMUsage. + extractUsage(response interface{}) *schemas.BifrostLLMUsage + + // applyUsage sets accumulated usage on the response in place. + applyUsage(response interface{}, usage *schemas.BifrostLLMUsage) } // chatAPIAdapter implements agentAPIAdapter for Chat API @@ -176,6 +182,14 @@ func (c *chatAPIAdapter) createResponseWithExecutedTools( ) } +func (c *chatAPIAdapter) extractUsage(response interface{}) *schemas.BifrostLLMUsage { + return response.(*schemas.BifrostChatResponse).Usage +} + +func (c *chatAPIAdapter) applyUsage(response interface{}, usage *schemas.BifrostLLMUsage) { + response.(*schemas.BifrostChatResponse).Usage = usage +} + // createChatResponseWithExecutedToolsAndNonAutoExecutableCalls creates a chat response // that includes executed tool results and non-auto-executable tool calls. The response // contains a formatted text summary of executed tool results and includes the non-auto-executable @@ -391,6 +405,14 @@ func (r *responsesAPIAdapter) createResponseWithExecutedTools( ) } +func (r *responsesAPIAdapter) extractUsage(response interface{}) *schemas.BifrostLLMUsage { + return response.(*schemas.BifrostResponsesResponse).Usage.ToBifrostLLMUsage() +} + +func (r *responsesAPIAdapter) applyUsage(response interface{}, usage *schemas.BifrostLLMUsage) { + response.(*schemas.BifrostResponsesResponse).Usage = usage.ToResponsesResponseUsage() +} + // createResponsesResponseWithExecutedToolsAndNonAutoExecutableCalls creates a responses response // that includes executed tool results and non-auto-executable tool calls. The response // contains a formatted text summary of executed tool results and includes the non-auto-executable diff --git a/transports/changelog.md b/transports/changelog.md index bd84c189fa..194e178e18 100644 --- a/transports/changelog.md +++ b/transports/changelog.md @@ -5,3 +5,4 @@ - feat: add support for request level extra headers in MCP tool execution. - fix: add support for `x-bf-mcp-include-clients` and `x-bf-mcp-include-tools` request headers to filter MCP tools/list response when using bifrost as an MCP gateway. - refactor: parallelize model listing for providers to speed up startup time. +- fix: send back accumulated usage in MCP agent mode. \ No newline at end of file