maximhq · Pratham-Mishra04 · Mar 24, 2026 · Mar 23, 2026
diff --git a/core/changelog.md b/core/changelog.md
@@ -2,3 +2,4 @@
 - feat: add BifrostContextKeyMCPAddedTools to context to track MCP tools added to the request
 - refactor: standardize empty array conventions in bifrost. Empty array means deny all, ["*"] means allow all for models/tools/keys.
 - feat: add support for request-level extra headers in MCP tool execution using BifrostContextKeyMCPExtraHeaders key in context.
+- fix: send back accumulated usage in MCP agent mode.
diff --git a/core/mcp/agent.go b/core/mcp/agent.go
@@ -155,6 +155,9 @@ func (a *AgentModeExecutor) executeAgent(
 	allExecutedToolResults := make([]*schemas.ChatMessage, 0)
 	allExecutedToolCalls := make([]schemas.ChatAssistantMessageToolCall, 0)
 
+	// Accumulate token usage across all LLM calls in the agent loop
+	accumulatedUsage := adapter.extractUsage(currentResponse)
+
 	originalRequestID, ok := ctx.Value(schemas.BifrostContextKeyRequestID).(string)
 	if ok {
 		ctx.SetValue(schemas.BifrostMCPAgentOriginalRequestID, originalRequestID)
@@ -335,6 +338,8 @@ func (a *AgentModeExecutor) executeAgent(
 			if depth == 1 && len(allExecutedToolResults) == 0 {
 				return currentResponse, nil
 			}
+			// Apply accumulated usage before building the final response
+			adapter.applyUsage(currentResponse, accumulatedUsage)
 			// Create response with all executed tool results from all iterations, and non-auto-executable tool calls
 			return adapter.createResponseWithExecutedTools(currentResponse, allExecutedToolResults, allExecutedToolCalls, nonAutoExecutableTools), nil
 		}
@@ -357,11 +362,127 @@ func (a *AgentModeExecutor) executeAgent(
 		}
 
 		currentResponse = response
+		accumulatedUsage = mergeUsage(accumulatedUsage, adapter.extractUsage(currentResponse))
 	}
 
+	adapter.applyUsage(currentResponse, accumulatedUsage)
 	return currentResponse, nil
 }
 
+// mergeUsage sums token counts and costs from two BifrostLLMUsage values.
+// Detail sub-fields are summed when both are present; if only one is non-nil it is kept as-is.
+func mergeUsage(base, add *schemas.BifrostLLMUsage) *schemas.BifrostLLMUsage {
+	if add == nil {
+		return base
+	}
+	if base == nil {
+		return add
+	}
+
+	merged := &schemas.BifrostLLMUsage{
+		PromptTokens:     base.PromptTokens + add.PromptTokens,
+		CompletionTokens: base.CompletionTokens + add.CompletionTokens,
+		TotalTokens:      base.TotalTokens + add.TotalTokens,
+	}
+
+	// Merge prompt token details
+	if base.PromptTokensDetails != nil || add.PromptTokensDetails != nil {
+		bd := base.PromptTokensDetails
+		ad := add.PromptTokensDetails
+		if bd == nil {
+			bd = &schemas.ChatPromptTokensDetails{}
+		}
+		if ad == nil {
+			ad = &schemas.ChatPromptTokensDetails{}
+		}
+		merged.PromptTokensDetails = &schemas.ChatPromptTokensDetails{
+			TextTokens:        bd.TextTokens + ad.TextTokens,
+			AudioTokens:       bd.AudioTokens + ad.AudioTokens,
+			ImageTokens:       bd.ImageTokens + ad.ImageTokens,
+			CachedReadTokens:  bd.CachedReadTokens + ad.CachedReadTokens,
+			CachedWriteTokens: bd.CachedWriteTokens + ad.CachedWriteTokens,
+		}
+	}
+
+	// Merge completion token details
+	if base.CompletionTokensDetails != nil || add.CompletionTokensDetails != nil {
+		bd := base.CompletionTokensDetails
+		ad := add.CompletionTokensDetails
+		if bd == nil {
+			bd = &schemas.ChatCompletionTokensDetails{}
+		}
+		if ad == nil {
+			ad = &schemas.ChatCompletionTokensDetails{}
+		}
+		merged.CompletionTokensDetails = &schemas.ChatCompletionTokensDetails{
+			TextTokens:               bd.TextTokens + ad.TextTokens,
+			AcceptedPredictionTokens: bd.AcceptedPredictionTokens + ad.AcceptedPredictionTokens,
+			AudioTokens:              bd.AudioTokens + ad.AudioTokens,
+			ReasoningTokens:          bd.ReasoningTokens + ad.ReasoningTokens,
+			RejectedPredictionTokens: bd.RejectedPredictionTokens + ad.RejectedPredictionTokens,
+		}
+		if bd.CitationTokens != nil || ad.CitationTokens != nil {
+			bct := 0
+			act := 0
+			if bd.CitationTokens != nil {
+				bct = *bd.CitationTokens
+			}
+			if ad.CitationTokens != nil {
+				act = *ad.CitationTokens
+			}
+			sum := bct + act
+			merged.CompletionTokensDetails.CitationTokens = &sum
+		}
+		if bd.NumSearchQueries != nil || ad.NumSearchQueries != nil {
+			bnsq := 0
+			ansq := 0
+			if bd.NumSearchQueries != nil {
+				bnsq = *bd.NumSearchQueries
+			}
+			if ad.NumSearchQueries != nil {
+				ansq = *ad.NumSearchQueries
+			}
+			sum := bnsq + ansq
+			merged.CompletionTokensDetails.NumSearchQueries = &sum
+		}
+		if bd.ImageTokens != nil || ad.ImageTokens != nil {
+			bit := 0
+			ait := 0
+			if bd.ImageTokens != nil {
+				bit = *bd.ImageTokens
+			}
+			if ad.ImageTokens != nil {
+				ait = *ad.ImageTokens
+			}
+			sum := bit + ait
+			merged.CompletionTokensDetails.ImageTokens = &sum
+		}
+	}
+
+	// Merge cost
+	if base.Cost != nil || add.Cost != nil {
+		bc := base.Cost
+		ac := add.Cost
+		if bc == nil {
+			bc = &schemas.BifrostCost{}
+		}
+		if ac == nil {
+			ac = &schemas.BifrostCost{}
+		}
+		merged.Cost = &schemas.BifrostCost{
+			InputTokensCost:     bc.InputTokensCost + ac.InputTokensCost,
+			OutputTokensCost:    bc.OutputTokensCost + ac.OutputTokensCost,
+			ReasoningTokensCost: bc.ReasoningTokensCost + ac.ReasoningTokensCost,
+			CitationTokensCost:  bc.CitationTokensCost + ac.CitationTokensCost,
+			SearchQueriesCost:   bc.SearchQueriesCost + ac.SearchQueriesCost,
+			RequestCost:         bc.RequestCost + ac.RequestCost,
+			TotalCost:           bc.TotalCost + ac.TotalCost,
+		}
+	}
+
+	return merged
+}
+
 // extractToolCalls extracts all tool calls from a chat response.
 // It iterates through all choices in the response and collects tool calls
 // from assistant messages.

diff --git a/core/mcp/agentadaptors.go b/core/mcp/agentadaptors.go
@@ -60,6 +60,12 @@ type agentAPIAdapter interface {
 		executedToolCalls []schemas.ChatAssistantMessageToolCall,
 		nonAutoExecutableToolCalls []schemas.ChatAssistantMessageToolCall,
 	) interface{}
+
+	// extractUsage returns the token usage from a response as BifrostLLMUsage.
+	extractUsage(response interface{}) *schemas.BifrostLLMUsage
+
+	// applyUsage sets accumulated usage on the response in place.
+	applyUsage(response interface{}, usage *schemas.BifrostLLMUsage)
 }
 
 // chatAPIAdapter implements agentAPIAdapter for Chat API
@@ -176,6 +182,14 @@ func (c *chatAPIAdapter) createResponseWithExecutedTools(
 	)
 }
 
+func (c *chatAPIAdapter) extractUsage(response interface{}) *schemas.BifrostLLMUsage {
+	return response.(*schemas.BifrostChatResponse).Usage
+}
+
+func (c *chatAPIAdapter) applyUsage(response interface{}, usage *schemas.BifrostLLMUsage) {
+	response.(*schemas.BifrostChatResponse).Usage = usage
+}
+
 // createChatResponseWithExecutedToolsAndNonAutoExecutableCalls creates a chat response
 // that includes executed tool results and non-auto-executable tool calls. The response
 // contains a formatted text summary of executed tool results and includes the non-auto-executable
@@ -391,6 +405,14 @@ func (r *responsesAPIAdapter) createResponseWithExecutedTools(
 	)
 }
 
+func (r *responsesAPIAdapter) extractUsage(response interface{}) *schemas.BifrostLLMUsage {
+	return response.(*schemas.BifrostResponsesResponse).Usage.ToBifrostLLMUsage()
+}
+
+func (r *responsesAPIAdapter) applyUsage(response interface{}, usage *schemas.BifrostLLMUsage) {
+	response.(*schemas.BifrostResponsesResponse).Usage = usage.ToResponsesResponseUsage()
+}
+
 // createResponsesResponseWithExecutedToolsAndNonAutoExecutableCalls creates a responses response
 // that includes executed tool results and non-auto-executable tool calls. The response
 // contains a formatted text summary of executed tool results and includes the non-auto-executable

diff --git a/transports/changelog.md b/transports/changelog.md
@@ -5,3 +5,4 @@
 - feat: add support for request level extra headers in MCP tool execution.
 - fix: add support for `x-bf-mcp-include-clients` and `x-bf-mcp-include-tools` request headers to filter MCP tools/list response when using bifrost as an MCP gateway.
 - refactor: parallelize model listing for providers to speed up startup time.
+- fix: send back accumulated usage in MCP agent mode.