Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
- feat: add BifrostContextKeyMCPAddedTools to context to track MCP tools added to the request
- refactor: standardize empty array conventions in bifrost. Empty array means deny all, ["*"] means allow all for models/tools/keys.
- feat: add support for request-level extra headers in MCP tool execution using BifrostContextKeyMCPExtraHeaders key in context.
- fix: send back accumulated usage in MCP agent mode.
121 changes: 121 additions & 0 deletions core/mcp/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ func (a *AgentModeExecutor) executeAgent(
allExecutedToolResults := make([]*schemas.ChatMessage, 0)
allExecutedToolCalls := make([]schemas.ChatAssistantMessageToolCall, 0)

// Accumulate token usage across all LLM calls in the agent loop
accumulatedUsage := adapter.extractUsage(currentResponse)

originalRequestID, ok := ctx.Value(schemas.BifrostContextKeyRequestID).(string)
if ok {
ctx.SetValue(schemas.BifrostMCPAgentOriginalRequestID, originalRequestID)
Expand Down Expand Up @@ -335,6 +338,8 @@ func (a *AgentModeExecutor) executeAgent(
if depth == 1 && len(allExecutedToolResults) == 0 {
return currentResponse, nil
}
// Apply accumulated usage before building the final response
adapter.applyUsage(currentResponse, accumulatedUsage)
// Create response with all executed tool results from all iterations, and non-auto-executable tool calls
return adapter.createResponseWithExecutedTools(currentResponse, allExecutedToolResults, allExecutedToolCalls, nonAutoExecutableTools), nil
}
Expand All @@ -357,11 +362,127 @@ func (a *AgentModeExecutor) executeAgent(
}

currentResponse = response
accumulatedUsage = mergeUsage(accumulatedUsage, adapter.extractUsage(currentResponse))
}

adapter.applyUsage(currentResponse, accumulatedUsage)
return currentResponse, nil
}

// mergeUsage sums token counts and costs from two BifrostLLMUsage values.
// Detail sub-fields are summed when both are present; if only one is non-nil it is kept as-is.
func mergeUsage(base, add *schemas.BifrostLLMUsage) *schemas.BifrostLLMUsage {
if add == nil {
return base
}
if base == nil {
return add
}

merged := &schemas.BifrostLLMUsage{
PromptTokens: base.PromptTokens + add.PromptTokens,
CompletionTokens: base.CompletionTokens + add.CompletionTokens,
TotalTokens: base.TotalTokens + add.TotalTokens,
}

// Merge prompt token details
if base.PromptTokensDetails != nil || add.PromptTokensDetails != nil {
bd := base.PromptTokensDetails
ad := add.PromptTokensDetails
if bd == nil {
bd = &schemas.ChatPromptTokensDetails{}
}
if ad == nil {
ad = &schemas.ChatPromptTokensDetails{}
}
merged.PromptTokensDetails = &schemas.ChatPromptTokensDetails{
TextTokens: bd.TextTokens + ad.TextTokens,
AudioTokens: bd.AudioTokens + ad.AudioTokens,
ImageTokens: bd.ImageTokens + ad.ImageTokens,
CachedReadTokens: bd.CachedReadTokens + ad.CachedReadTokens,
CachedWriteTokens: bd.CachedWriteTokens + ad.CachedWriteTokens,
}
}

// Merge completion token details
if base.CompletionTokensDetails != nil || add.CompletionTokensDetails != nil {
bd := base.CompletionTokensDetails
ad := add.CompletionTokensDetails
if bd == nil {
bd = &schemas.ChatCompletionTokensDetails{}
}
if ad == nil {
ad = &schemas.ChatCompletionTokensDetails{}
}
merged.CompletionTokensDetails = &schemas.ChatCompletionTokensDetails{
TextTokens: bd.TextTokens + ad.TextTokens,
AcceptedPredictionTokens: bd.AcceptedPredictionTokens + ad.AcceptedPredictionTokens,
AudioTokens: bd.AudioTokens + ad.AudioTokens,
ReasoningTokens: bd.ReasoningTokens + ad.ReasoningTokens,
RejectedPredictionTokens: bd.RejectedPredictionTokens + ad.RejectedPredictionTokens,
}
if bd.CitationTokens != nil || ad.CitationTokens != nil {
bct := 0
act := 0
if bd.CitationTokens != nil {
bct = *bd.CitationTokens
}
if ad.CitationTokens != nil {
act = *ad.CitationTokens
}
sum := bct + act
merged.CompletionTokensDetails.CitationTokens = &sum
}
if bd.NumSearchQueries != nil || ad.NumSearchQueries != nil {
bnsq := 0
ansq := 0
if bd.NumSearchQueries != nil {
bnsq = *bd.NumSearchQueries
}
if ad.NumSearchQueries != nil {
ansq = *ad.NumSearchQueries
}
sum := bnsq + ansq
merged.CompletionTokensDetails.NumSearchQueries = &sum
}
if bd.ImageTokens != nil || ad.ImageTokens != nil {
bit := 0
ait := 0
if bd.ImageTokens != nil {
bit = *bd.ImageTokens
}
if ad.ImageTokens != nil {
ait = *ad.ImageTokens
}
sum := bit + ait
merged.CompletionTokensDetails.ImageTokens = &sum
}
}

// Merge cost
if base.Cost != nil || add.Cost != nil {
bc := base.Cost
ac := add.Cost
if bc == nil {
bc = &schemas.BifrostCost{}
}
if ac == nil {
ac = &schemas.BifrostCost{}
}
merged.Cost = &schemas.BifrostCost{
InputTokensCost: bc.InputTokensCost + ac.InputTokensCost,
OutputTokensCost: bc.OutputTokensCost + ac.OutputTokensCost,
ReasoningTokensCost: bc.ReasoningTokensCost + ac.ReasoningTokensCost,
CitationTokensCost: bc.CitationTokensCost + ac.CitationTokensCost,
SearchQueriesCost: bc.SearchQueriesCost + ac.SearchQueriesCost,
RequestCost: bc.RequestCost + ac.RequestCost,
TotalCost: bc.TotalCost + ac.TotalCost,
}
}

return merged
}

// extractToolCalls extracts all tool calls from a chat response.
// It iterates through all choices in the response and collects tool calls
// from assistant messages.
Expand Down
22 changes: 22 additions & 0 deletions core/mcp/agentadaptors.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ type agentAPIAdapter interface {
executedToolCalls []schemas.ChatAssistantMessageToolCall,
nonAutoExecutableToolCalls []schemas.ChatAssistantMessageToolCall,
) interface{}

// extractUsage returns the token usage from a response as BifrostLLMUsage.
extractUsage(response interface{}) *schemas.BifrostLLMUsage

// applyUsage sets accumulated usage on the response in place.
applyUsage(response interface{}, usage *schemas.BifrostLLMUsage)
}

// chatAPIAdapter implements agentAPIAdapter for Chat API
Expand Down Expand Up @@ -176,6 +182,14 @@ func (c *chatAPIAdapter) createResponseWithExecutedTools(
)
}

func (c *chatAPIAdapter) extractUsage(response interface{}) *schemas.BifrostLLMUsage {
return response.(*schemas.BifrostChatResponse).Usage
}

func (c *chatAPIAdapter) applyUsage(response interface{}, usage *schemas.BifrostLLMUsage) {
response.(*schemas.BifrostChatResponse).Usage = usage
}

// createChatResponseWithExecutedToolsAndNonAutoExecutableCalls creates a chat response
// that includes executed tool results and non-auto-executable tool calls. The response
// contains a formatted text summary of executed tool results and includes the non-auto-executable
Expand Down Expand Up @@ -391,6 +405,14 @@ func (r *responsesAPIAdapter) createResponseWithExecutedTools(
)
}

func (r *responsesAPIAdapter) extractUsage(response interface{}) *schemas.BifrostLLMUsage {
return response.(*schemas.BifrostResponsesResponse).Usage.ToBifrostLLMUsage()
}

func (r *responsesAPIAdapter) applyUsage(response interface{}, usage *schemas.BifrostLLMUsage) {
response.(*schemas.BifrostResponsesResponse).Usage = usage.ToResponsesResponseUsage()
}

// createResponsesResponseWithExecutedToolsAndNonAutoExecutableCalls creates a responses response
// that includes executed tool results and non-auto-executable tool calls. The response
// contains a formatted text summary of executed tool results and includes the non-auto-executable
Expand Down
1 change: 1 addition & 0 deletions transports/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
- feat: add support for request level extra headers in MCP tool execution.
- fix: add support for `x-bf-mcp-include-clients` and `x-bf-mcp-include-tools` request headers to filter MCP tools/list response when using bifrost as an MCP gateway.
- refactor: parallelize model listing for providers to speed up startup time.
- fix: send back accumulated usage in MCP agent mode.