diff --git a/core/mcp.go b/core/mcp.go
deleted file mode 100644
index be9fb580b0..0000000000
--- a/core/mcp.go
+++ /dev/null
@@ -1,1183 +0,0 @@
-package bifrost
-
-import (
- "context"
- "encoding/json"
- "fmt"
- "maps"
- "os"
- "slices"
- "strings"
- "sync"
- "time"
-
- "github.com/maximhq/bifrost/core/schemas"
-
- "github.com/mark3labs/mcp-go/client"
- "github.com/mark3labs/mcp-go/client/transport"
- "github.com/mark3labs/mcp-go/mcp"
- "github.com/mark3labs/mcp-go/server"
-)
-
-// ============================================================================
-// CONSTANTS
-// ============================================================================
-
-const (
- // MCP defaults and identifiers
- BifrostMCPVersion = "1.0.0" // Version identifier for Bifrost
- BifrostMCPClientName = "BifrostClient" // Name for internal Bifrost MCP client
- BifrostMCPClientKey = "bifrost-internal" // Key for internal Bifrost client in clientMap
- MCPLogPrefix = "[Bifrost MCP]" // Consistent logging prefix
- MCPClientConnectionEstablishTimeout = 30 * time.Second // Timeout for MCP client connection establishment
-
- // Context keys for client filtering in requests
- // NOTE: []string is used for both keys, and by default all clients/tools are included (when nil).
- // If "*" is present, all clients/tools are included, and [] means no clients/tools are included.
- // Request context filtering takes priority over client config - context can override client exclusions.
- MCPContextKeyIncludeClients schemas.BifrostContextKey = "mcp-include-clients" // Context key for whitelist client filtering
- MCPContextKeyIncludeTools schemas.BifrostContextKey = "mcp-include-tools" // Context key for whitelist tool filtering (Note: toolName should be in "clientName/toolName" format)
-)
-
-// ============================================================================
-// TYPE DEFINITIONS
-// ============================================================================
-
-// MCPManager manages MCP integration for Bifrost core.
-// It provides a bridge between Bifrost and various MCP servers, supporting
-// both local tool hosting and external MCP server connections.
-type MCPManager struct {
- ctx context.Context
- server *server.MCPServer // Local MCP server instance for hosting tools (STDIO-based)
- clientMap map[string]*MCPClient // Map of MCP client names to their configurations
- mu sync.RWMutex // Read-write mutex for thread-safe operations
- serverRunning bool // Track whether local MCP server is running
- logger schemas.Logger // Logger instance for structured logging
-}
-
-// MCPClient represents a connected MCP client with its configuration and tools.
-type MCPClient struct {
- // Name string // Unique name for this client
- Conn *client.Client // Active MCP client connection
- ExecutionConfig schemas.MCPClientConfig // Tool filtering settings
- ToolMap map[string]schemas.ChatTool // Available tools mapped by name
- ConnectionInfo MCPClientConnectionInfo `json:"connection_info"` // Connection metadata for management
- cancelFunc context.CancelFunc `json:"-"` // Cancel function for SSE connections (not serialized)
-}
-
-// MCPClientConnectionInfo stores metadata about how a client is connected.
-type MCPClientConnectionInfo struct {
- Type schemas.MCPConnectionType `json:"type"` // Connection type (HTTP, STDIO, SSE, or InProcess)
- ConnectionURL *string `json:"connection_url,omitempty"` // HTTP/SSE endpoint URL (for HTTP/SSE connections)
- StdioCommandString *string `json:"stdio_command_string,omitempty"` // Command string for display (for STDIO connections)
-}
-
-// MCPToolHandler is a generic function type for handling tool calls with typed arguments.
-// T represents the expected argument structure for the tool.
-type MCPToolHandler[T any] func(args T) (string, error)
-
-// ============================================================================
-// CONSTRUCTOR AND INITIALIZATION
-// ============================================================================
-
-// newMCPManager creates and initializes a new MCP manager instance.
-//
-// Parameters:
-// - config: MCP configuration including server port and client configs
-// - logger: Logger instance for structured logging (uses default if nil)
-//
-// Returns:
-// - *MCPManager: Initialized manager instance
-// - error: Any initialization error
-func newMCPManager(ctx context.Context, config schemas.MCPConfig, logger schemas.Logger) (*MCPManager, error) {
- // Creating new instance
- manager := &MCPManager{
- ctx: ctx,
- clientMap: make(map[string]*MCPClient),
- logger: logger,
- }
- // Process client configs: create client map entries and establish connections
- for _, clientConfig := range config.ClientConfigs {
- if err := manager.AddClient(clientConfig); err != nil {
- manager.logger.Warn(fmt.Sprintf("%s Failed to add MCP client %s: %v", MCPLogPrefix, clientConfig.Name, err))
- }
- }
- manager.logger.Info(MCPLogPrefix + " MCP Manager initialized")
- return manager, nil
-}
-
-// GetClients returns all MCP clients managed by the manager.
-//
-// Returns:
-// - []*MCPClient: List of all MCP clients
-// - error: Any retrieval error
-func (m *MCPManager) GetClients() ([]MCPClient, error) {
- m.mu.RLock()
- defer m.mu.RUnlock()
-
- clients := make([]MCPClient, 0, len(m.clientMap))
- for _, client := range m.clientMap {
- clients = append(clients, *client)
- }
-
- return clients, nil
-}
-
-// ReconnectClient attempts to reconnect an MCP client if it is disconnected.
-func (m *MCPManager) ReconnectClient(id string) error {
- m.mu.Lock()
-
- client, ok := m.clientMap[id]
- if !ok {
- m.mu.Unlock()
- return fmt.Errorf("client %s not found", id)
- }
-
- m.mu.Unlock()
-
- // connectToMCPClient handles locking internally
- err := m.connectToMCPClient(client.ExecutionConfig)
- if err != nil {
- return fmt.Errorf("failed to connect to MCP client %s: %w", id, err)
- }
-
- return nil
-}
-
-// AddClient adds a new MCP client to the manager.
-// It validates the client configuration and establishes a connection.
-//
-// Parameters:
-// - config: MCP client configuration
-//
-// Returns:
-func (m *MCPManager) AddClient(config schemas.MCPClientConfig) error {
- if err := validateMCPClientConfig(&config); err != nil {
- return fmt.Errorf("invalid MCP client configuration: %w", err)
- }
-
- // Make a copy of the config to use after unlocking
- configCopy := config
-
- m.mu.Lock()
-
- if _, ok := m.clientMap[config.ID]; ok {
- m.mu.Unlock()
- return fmt.Errorf("client %s already exists", config.Name)
- }
-
- // Create placeholder entry
- m.clientMap[config.ID] = &MCPClient{
- ExecutionConfig: config,
- ToolMap: make(map[string]schemas.ChatTool),
- }
-
- // Temporarily unlock for the connection attempt
- // This is to avoid deadlocks when the connection attempt is made
- m.mu.Unlock()
-
- // Connect using the copied config
- if err := m.connectToMCPClient(configCopy); err != nil {
- // Re-lock to clean up the failed entry
- m.mu.Lock()
- delete(m.clientMap, config.ID)
- m.mu.Unlock()
- return fmt.Errorf("failed to connect to MCP client %s: %w", config.Name, err)
- }
-
- return nil
-}
-
-// RemoveClient removes an MCP client from the manager.
-// It handles cleanup for all transport types (HTTP, STDIO, SSE).
-//
-// Parameters:
-// - id: ID of the client to remove
-func (m *MCPManager) RemoveClient(id string) error {
- m.mu.Lock()
- defer m.mu.Unlock()
-
- return m.removeClientUnsafe(id)
-}
-
-func (m *MCPManager) removeClientUnsafe(id string) error {
- client, ok := m.clientMap[id]
- if !ok {
- return fmt.Errorf("client %s not found", id)
- }
-
- m.logger.Info(fmt.Sprintf("%s Disconnecting MCP client: %s", MCPLogPrefix, client.ExecutionConfig.Name))
-
- // Cancel SSE context if present (required for proper SSE cleanup)
- if client.cancelFunc != nil {
- client.cancelFunc()
- client.cancelFunc = nil
- }
-
- // Close the client transport connection
- // This handles cleanup for all transport types (HTTP, STDIO, SSE)
- if client.Conn != nil {
- if err := client.Conn.Close(); err != nil {
- m.logger.Error("%s Failed to close MCP client %s: %v", MCPLogPrefix, client.ExecutionConfig.Name, err)
- }
- client.Conn = nil
- }
-
- // Clear client tool map
- client.ToolMap = make(map[string]schemas.ChatTool)
-
- delete(m.clientMap, id)
- return nil
-}
-
-func (m *MCPManager) EditClient(id string, updatedConfig schemas.MCPClientConfig) error {
- m.mu.Lock()
- defer m.mu.Unlock()
-
- client, ok := m.clientMap[id]
- if !ok {
- return fmt.Errorf("client %s not found", id)
- }
-
- // Update the client's execution config with new tool filters
- config := client.ExecutionConfig
- config.Name = updatedConfig.Name
- config.Headers = updatedConfig.Headers
- config.ToolsToExecute = updatedConfig.ToolsToExecute
-
- // Store the updated config
- client.ExecutionConfig = config
-
- if client.Conn == nil {
- return nil // Client is not connected, so no tools to update
- }
-
- // Clear current tool map
- client.ToolMap = make(map[string]schemas.ChatTool)
-
- // Temporarily unlock for the network call
- m.mu.Unlock()
-
- // Retrieve tools with updated configuration
- tools, err := m.retrieveExternalTools(m.ctx, client.Conn, config)
-
- // Re-lock to update the tool map
- m.mu.Lock()
-
- // Verify client still exists
- if _, ok := m.clientMap[id]; !ok {
- return fmt.Errorf("client %s was removed during tool update", id)
- }
-
- if err != nil {
- return fmt.Errorf("failed to retrieve external tools: %w", err)
- }
-
- // Store discovered tools
- maps.Copy(client.ToolMap, tools)
-
- return nil
-}
-
-// ============================================================================
-// TOOL REGISTRATION AND DISCOVERY
-// ============================================================================
-
-// getAvailableTools returns all tools from connected MCP clients.
-// Applies client filtering if specified in the context.
-func (m *MCPManager) getAvailableTools(ctx context.Context) []schemas.ChatTool {
- m.mu.RLock()
- defer m.mu.RUnlock()
-
- var includeClients []string
-
- // Extract client filtering from request context
- if existingIncludeClients, ok := ctx.Value(MCPContextKeyIncludeClients).([]string); ok && existingIncludeClients != nil {
- includeClients = existingIncludeClients
- }
-
- tools := make([]schemas.ChatTool, 0)
- for id, client := range m.clientMap {
- // Apply client filtering logic
- if !m.shouldIncludeClient(id, includeClients) {
- m.logger.Debug(fmt.Sprintf("%s Skipping MCP client %s: not in include clients list", MCPLogPrefix, id))
- continue
- }
-
- m.logger.Debug(fmt.Sprintf("Checking tools for MCP client %s with tools to execute: %v", id, client.ExecutionConfig.ToolsToExecute))
-
- // Add all tools from this client
- for toolName, tool := range client.ToolMap {
- // Check if tool should be skipped based on client configuration
- if m.shouldSkipToolForConfig(toolName, client.ExecutionConfig) {
- m.logger.Debug(fmt.Sprintf("%s Skipping MCP tool %s: not in tools to execute list", MCPLogPrefix, toolName))
- continue
- }
-
- // Check if tool should be skipped based on request context
- if m.shouldSkipToolForRequest(id, toolName, ctx) {
- m.logger.Debug(fmt.Sprintf("%s Skipping MCP tool %s: not in include tools list", MCPLogPrefix, toolName))
- continue
- }
-
- tools = append(tools, tool)
- }
- }
- return tools
-}
-
-// registerTool registers a typed tool handler with the local MCP server.
-// This is a convenience function that handles the conversion between typed Go
-// handlers and the MCP protocol.
-//
-// Type Parameters:
-// - T: The expected argument type for the tool (must be JSON-deserializable)
-//
-// Parameters:
-// - name: Unique tool name
-// - description: Human-readable tool description
-// - handler: Typed function that handles tool execution
-// - toolSchema: Bifrost tool schema for function calling
-//
-// Returns:
-// - error: Any registration error
-//
-// Example:
-//
-// type EchoArgs struct {
-// Message string `json:"message"`
-// }
-//
-// err := bifrost.RegisterMCPTool("echo", "Echo a message",
-// func(args EchoArgs) (string, error) {
-// return args.Message, nil
-// }, toolSchema)
-func (m *MCPManager) registerTool(name, description string, handler MCPToolHandler[any], toolSchema schemas.ChatTool) error {
- // Ensure local server is set up
- if err := m.setupLocalHost(); err != nil {
- return fmt.Errorf("failed to setup local host: %w", err)
- }
-
- // Verify internal client exists
- if _, ok := m.clientMap[BifrostMCPClientKey]; !ok {
- return fmt.Errorf("bifrost client not found")
- }
-
- m.mu.Lock()
- defer m.mu.Unlock()
-
- // Check if tool name already exists to prevent silent overwrites
- if _, exists := m.clientMap[BifrostMCPClientKey].ToolMap[name]; exists {
- return fmt.Errorf("tool '%s' is already registered", name)
- }
-
- m.logger.Info(fmt.Sprintf("%s Registering typed tool: %s", MCPLogPrefix, name))
-
- // Create MCP handler wrapper that converts between typed and MCP interfaces
- mcpHandler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
- // Extract arguments from the request using the request's methods
- args := request.GetArguments()
- result, err := handler(args)
- if err != nil {
- return mcp.NewToolResultError(fmt.Sprintf("Error: %s", err.Error())), nil
- }
- return mcp.NewToolResultText(result), nil
- }
-
- // Register the tool with the local MCP server using AddTool
- if m.server != nil {
- tool := mcp.NewTool(name, mcp.WithDescription(description))
- m.server.AddTool(tool, mcpHandler)
- }
-
- // Store tool definition for Bifrost integration
- m.clientMap[BifrostMCPClientKey].ToolMap[name] = toolSchema
-
- return nil
-}
-
-// setupLocalHost initializes the local MCP server and client if not already running.
-// This creates a STDIO-based server for local tool hosting and a corresponding client.
-// This is called automatically when tools are registered or when the server is needed.
-//
-// Returns:
-// - error: Any setup error
-func (m *MCPManager) setupLocalHost() error {
- // Check if server is already running
- if m.server != nil && m.serverRunning {
- return nil
- }
-
- // Create and configure local MCP server (STDIO-based)
- server, err := m.createLocalMCPServer()
- if err != nil {
- return fmt.Errorf("failed to create local MCP server: %w", err)
- }
- m.server = server
-
- // Create and configure local MCP client (STDIO-based)
- client, err := m.createLocalMCPClient()
- if err != nil {
- return fmt.Errorf("failed to create local MCP client: %w", err)
- }
- m.clientMap[BifrostMCPClientKey] = client
-
- // Start the server and initialize client connection
- return m.startLocalMCPServer()
-}
-
-// createLocalMCPServer creates a new local MCP server instance with STDIO transport.
-// This server will host tools registered via RegisterTool function.
-//
-// Returns:
-// - *server.MCPServer: Configured MCP server instance
-// - error: Any creation error
-func (m *MCPManager) createLocalMCPServer() (*server.MCPServer, error) {
- // Create MCP server
- mcpServer := server.NewMCPServer(
- "Bifrost-MCP-Server",
- "1.0.0",
- server.WithToolCapabilities(true),
- )
-
- return mcpServer, nil
-}
-
-// createLocalMCPClient creates a placeholder client entry for the local MCP server.
-// The actual in-process client connection will be established in startLocalMCPServer.
-//
-// Returns:
-// - *MCPClient: Placeholder client for local server
-// - error: Any creation error
-func (m *MCPManager) createLocalMCPClient() (*MCPClient, error) {
- // Don't create the actual client connection here - it will be created
- // after the server is ready using NewInProcessClient
- return &MCPClient{
- ExecutionConfig: schemas.MCPClientConfig{
- Name: BifrostMCPClientName,
- },
- ToolMap: make(map[string]schemas.ChatTool),
- ConnectionInfo: MCPClientConnectionInfo{
- Type: schemas.MCPConnectionTypeInProcess, // Accurate: in-process (in-memory) transport
- },
- }, nil
-}
-
-// startLocalMCPServer creates an in-process connection between the local server and client.
-//
-// Returns:
-// - error: Any startup error
-func (m *MCPManager) startLocalMCPServer() error {
- m.mu.Lock()
- defer m.mu.Unlock()
-
- // Check if server is already running
- if m.server != nil && m.serverRunning {
- return nil
- }
-
- if m.server == nil {
- return fmt.Errorf("server not initialized")
- }
-
- // Create in-process client directly connected to the server
- inProcessClient, err := client.NewInProcessClient(m.server)
- if err != nil {
- return fmt.Errorf("failed to create in-process MCP client: %w", err)
- }
-
- // Update the client connection
- clientEntry, ok := m.clientMap[BifrostMCPClientKey]
- if !ok {
- return fmt.Errorf("bifrost client not found")
- }
- clientEntry.Conn = inProcessClient
-
- // Initialize the in-process client
- ctx, cancel := context.WithTimeout(m.ctx, MCPClientConnectionEstablishTimeout)
- defer cancel()
-
- // Create proper initialize request with correct structure
- initRequest := mcp.InitializeRequest{
- Params: mcp.InitializeParams{
- ProtocolVersion: mcp.LATEST_PROTOCOL_VERSION,
- Capabilities: mcp.ClientCapabilities{},
- ClientInfo: mcp.Implementation{
- Name: BifrostMCPClientName,
- Version: BifrostMCPVersion,
- },
- },
- }
-
- _, err = inProcessClient.Initialize(ctx, initRequest)
- if err != nil {
- return fmt.Errorf("failed to initialize MCP client: %w", err)
- }
-
- // Mark server as running
- m.serverRunning = true
-
- return nil
-}
-
-// executeTool executes a tool call and returns the result as a tool message.
-//
-// Parameters:
-// - ctx: Execution context
-// - toolCall: The tool call to execute (from assistant message)
-//
-// Returns:
-// - schemas.ChatMessage: Tool message with execution result
-// - error: Any execution error
-func (m *MCPManager) executeTool(ctx context.Context, toolCall schemas.ChatAssistantMessageToolCall) (*schemas.ChatMessage, error) {
- if toolCall.Function.Name == nil {
- return nil, fmt.Errorf("tool call missing function name")
- }
- toolName := *toolCall.Function.Name
-
- // Parse tool arguments
- var arguments map[string]interface{}
- if err := json.Unmarshal([]byte(toolCall.Function.Arguments), &arguments); err != nil {
- return nil, fmt.Errorf("failed to parse tool arguments for '%s': %v", toolName, err)
- }
-
- // Find which client has this tool
- client := m.findMCPClientForTool(toolName)
- if client == nil {
- return nil, fmt.Errorf("tool '%s' not found in any connected MCP client", toolName)
- }
-
- if client.Conn == nil {
- return nil, fmt.Errorf("client '%s' has no active connection", client.ExecutionConfig.Name)
- }
-
- // Call the tool via MCP client -> MCP server
- callRequest := mcp.CallToolRequest{
- Request: mcp.Request{
- Method: string(mcp.MethodToolsCall),
- },
- Params: mcp.CallToolParams{
- Name: toolName,
- Arguments: arguments,
- },
- }
-
- m.logger.Debug(fmt.Sprintf("%s Starting tool execution: %s via client: %s", MCPLogPrefix, toolName, client.ExecutionConfig.Name))
-
- toolResponse, callErr := client.Conn.CallTool(ctx, callRequest)
- if callErr != nil {
- m.logger.Error("%s Tool execution failed for %s via client %s: %v", MCPLogPrefix, toolName, client.ExecutionConfig.Name, callErr)
- return nil, fmt.Errorf("MCP tool call failed: %v", callErr)
- }
-
- m.logger.Debug(fmt.Sprintf("%s Tool execution completed: %s", MCPLogPrefix, toolName))
-
- // Extract text from MCP response
- responseText := m.extractTextFromMCPResponse(toolResponse, toolName)
-
- // Create tool response message
- return m.createToolResponseMessage(toolCall, responseText), nil
-}
-
-// ============================================================================
-// EXTERNAL MCP CONNECTION MANAGEMENT
-// ============================================================================
-
-// connectToMCPClient establishes a connection to an external MCP server and
-// registers its available tools with the manager.
-func (m *MCPManager) connectToMCPClient(config schemas.MCPClientConfig) error {
- // First lock: Initialize or validate client entry
- m.mu.Lock()
-
- // Initialize or validate client entry
- if existingClient, exists := m.clientMap[config.ID]; exists {
- // Client entry exists from config, check for existing connection, if it does then close
- if existingClient.cancelFunc != nil {
- existingClient.cancelFunc()
- existingClient.cancelFunc = nil
- }
- if existingClient.Conn != nil {
- existingClient.Conn.Close()
- }
- // Update connection type for this connection attempt
- existingClient.ConnectionInfo.Type = config.ConnectionType
- }
- // Create new client entry with configuration
- m.clientMap[config.ID] = &MCPClient{
- ExecutionConfig: config,
- ToolMap: make(map[string]schemas.ChatTool),
- ConnectionInfo: MCPClientConnectionInfo{
- Type: config.ConnectionType,
- },
- }
- m.mu.Unlock()
-
- // Heavy operations performed outside lock
- var externalClient *client.Client
- var connectionInfo MCPClientConnectionInfo
- var err error
-
- // Create appropriate transport based on connection type
- switch config.ConnectionType {
- case schemas.MCPConnectionTypeHTTP:
- externalClient, connectionInfo, err = m.createHTTPConnection(config)
- case schemas.MCPConnectionTypeSTDIO:
- externalClient, connectionInfo, err = m.createSTDIOConnection(config)
- case schemas.MCPConnectionTypeSSE:
- externalClient, connectionInfo, err = m.createSSEConnection(config)
- case schemas.MCPConnectionTypeInProcess:
- externalClient, connectionInfo, err = m.createInProcessConnection(config)
- default:
- return fmt.Errorf("unknown connection type: %s", config.ConnectionType)
- }
-
- if err != nil {
- return fmt.Errorf("failed to create connection: %w", err)
- }
-
- // Initialize the external client with timeout
- // For SSE connections, we need a long-lived context, for others we can use timeout
- var ctx context.Context
- var cancel context.CancelFunc
-
- if config.ConnectionType == schemas.MCPConnectionTypeSSE {
- // SSE connections need a long-lived context for the persistent stream
- ctx, cancel = context.WithCancel(m.ctx)
- // Don't defer cancel here - SSE needs the context to remain active
- } else {
- // Other connection types can use timeout context
- ctx, cancel = context.WithTimeout(m.ctx, MCPClientConnectionEstablishTimeout)
- defer cancel()
- }
-
- // Start the transport first (required for STDIO and SSE clients)
- if err := externalClient.Start(ctx); err != nil {
- if config.ConnectionType == schemas.MCPConnectionTypeSSE {
- cancel() // Cancel SSE context only on error
- }
- return fmt.Errorf("failed to start MCP client transport %s: %v", config.Name, err)
- }
-
- // Create proper initialize request for external client
- extInitRequest := mcp.InitializeRequest{
- Params: mcp.InitializeParams{
- ProtocolVersion: mcp.LATEST_PROTOCOL_VERSION,
- Capabilities: mcp.ClientCapabilities{},
- ClientInfo: mcp.Implementation{
- Name: fmt.Sprintf("Bifrost-%s", config.Name),
- Version: "1.0.0",
- },
- },
- }
-
- _, err = externalClient.Initialize(ctx, extInitRequest)
- if err != nil {
- if config.ConnectionType == schemas.MCPConnectionTypeSSE {
- cancel() // Cancel SSE context only on error
- }
- return fmt.Errorf("failed to initialize MCP client %s: %v", config.Name, err)
- }
-
- // Retrieve tools from the external server (this also requires network I/O)
- tools, err := m.retrieveExternalTools(ctx, externalClient, config)
- if err != nil {
- m.logger.Warn(fmt.Sprintf("%s Failed to retrieve tools from %s: %v", MCPLogPrefix, config.Name, err))
- // Continue with connection even if tool retrieval fails
- tools = make(map[string]schemas.ChatTool)
- }
-
- // Second lock: Update client with final connection details and tools
- m.mu.Lock()
- defer m.mu.Unlock()
-
- // Verify client still exists (could have been cleaned up during heavy operations)
- if client, exists := m.clientMap[config.ID]; exists {
- // Store the external client connection and details
- client.Conn = externalClient
- client.ConnectionInfo = connectionInfo
-
- // Store cancel function for SSE connections to enable proper cleanup
- if config.ConnectionType == schemas.MCPConnectionTypeSSE {
- client.cancelFunc = cancel
- }
-
- // Store discovered tools
- for toolName, tool := range tools {
- client.ToolMap[toolName] = tool
- }
-
- m.logger.Info(fmt.Sprintf("%s Connected to MCP client: %s", MCPLogPrefix, config.Name))
- } else {
- return fmt.Errorf("client %s was removed during connection setup", config.Name)
- }
-
- return nil
-}
-
-// retrieveExternalTools retrieves and filters tools from an external MCP server without holding locks.
-func (m *MCPManager) retrieveExternalTools(ctx context.Context, client *client.Client, config schemas.MCPClientConfig) (map[string]schemas.ChatTool, error) {
- // Get available tools from external server
- listRequest := mcp.ListToolsRequest{
- PaginatedRequest: mcp.PaginatedRequest{
- Request: mcp.Request{
- Method: string(mcp.MethodToolsList),
- },
- },
- }
-
- toolsResponse, err := client.ListTools(ctx, listRequest)
- if err != nil {
- return nil, fmt.Errorf("failed to list tools: %v", err)
- }
-
- if toolsResponse == nil {
- return make(map[string]schemas.ChatTool), nil // No tools available
- }
-
- m.logger.Debug(fmt.Sprintf("%s Retrieved %d tools from %s", MCPLogPrefix, len(toolsResponse.Tools), config.Name))
-
- tools := make(map[string]schemas.ChatTool)
-
- // toolsResponse is already a ListToolsResult
- for _, mcpTool := range toolsResponse.Tools {
- // Convert MCP tool schema to Bifrost format
- bifrostTool := m.convertMCPToolToBifrostSchema(&mcpTool)
- tools[mcpTool.Name] = bifrostTool
- }
-
- return tools, nil
-}
-
-// shouldSkipToolForConfig checks if a tool should be skipped based on client configuration (without accessing clientMap).
-func (m *MCPManager) shouldSkipToolForConfig(toolName string, config schemas.MCPClientConfig) bool {
- // If ToolsToExecute is specified (not nil), apply filtering
- if config.ToolsToExecute != nil {
- // Handle empty array [] - means no tools are allowed
- if len(config.ToolsToExecute) == 0 {
- return true // No tools allowed
- }
-
- // Handle wildcard "*" - if present, all tools are allowed
- if slices.Contains(config.ToolsToExecute, "*") {
- return false // All tools allowed
- }
-
- // Check if specific tool is in the allowed list
- for _, allowedTool := range config.ToolsToExecute {
- if allowedTool == toolName {
- return false // Tool is allowed
- }
- }
- return true // Tool not in allowed list
- }
-
- return true // Tool is skipped (nil is treated as [] - no tools)
-}
-
-// shouldSkipToolForRequest checks if a tool should be skipped based on the request context.
-func (m *MCPManager) shouldSkipToolForRequest(clientID, toolName string, ctx context.Context) bool {
- includeTools := ctx.Value(MCPContextKeyIncludeTools)
-
- if includeTools != nil {
- // Try []string first (preferred type)
- if includeToolsList, ok := includeTools.([]string); ok {
- // Handle empty array [] - means no tools are included
- if len(includeToolsList) == 0 {
- return true // No tools allowed
- }
-
- // Handle wildcard "clientName/*" - if present, all tools are included for this client
- if slices.Contains(includeToolsList, fmt.Sprintf("%s/*", clientID)) {
- return false // All tools allowed
- }
-
- // Check if specific tool is in the list (format: clientName/toolName)
- fullToolName := fmt.Sprintf("%s/%s", clientID, toolName)
- if slices.Contains(includeToolsList, fullToolName) {
- return false // Tool is explicitly allowed
- }
-
- // If includeTools is specified but this tool is not in it, skip it
- return true
- }
- }
-
- return false // Tool is allowed (default when no filtering specified)
-}
-
-// convertMCPToolToBifrostSchema converts an MCP tool definition to Bifrost format.
-func (m *MCPManager) convertMCPToolToBifrostSchema(mcpTool *mcp.Tool) schemas.ChatTool {
- var properties *schemas.OrderedMap
- schemaType := mcpTool.InputSchema.Type
-
- // Ensure properties is always set (required by OpenAI API validation)
- if len(mcpTool.InputSchema.Properties) > 0 {
- orderedProps := make(schemas.OrderedMap, len(mcpTool.InputSchema.Properties))
- maps.Copy(orderedProps, mcpTool.InputSchema.Properties)
- properties = &orderedProps
- } else {
- // OpenAI function calling API always expects object schemas with properties field present
- emptyProps := make(schemas.OrderedMap)
- properties = &emptyProps
- }
-
- // Default to "object" type if empty (OpenAI function calling always uses object schemas)
- if schemaType == "" {
- schemaType = "object"
- }
-
- return schemas.ChatTool{
- Type: schemas.ChatToolTypeFunction,
- Function: &schemas.ChatToolFunction{
- Name: mcpTool.Name,
- Description: Ptr(mcpTool.Description),
- Parameters: &schemas.ToolFunctionParameters{
- Type: schemaType,
- Properties: properties,
- Required: mcpTool.InputSchema.Required,
- },
- },
- }
-}
-
-// extractTextFromMCPResponse extracts text content from an MCP tool response.
-func (m *MCPManager) extractTextFromMCPResponse(toolResponse *mcp.CallToolResult, toolName string) string {
- if toolResponse == nil {
- return fmt.Sprintf("MCP tool '%s' executed successfully", toolName)
- }
-
- var result strings.Builder
- for _, contentBlock := range toolResponse.Content {
- // Handle typed content
- switch content := contentBlock.(type) {
- case mcp.TextContent:
- result.WriteString(content.Text)
- case mcp.ImageContent:
- result.WriteString(fmt.Sprintf("[Image Response: %s, MIME: %s]\n", content.Data, content.MIMEType))
- case mcp.AudioContent:
- result.WriteString(fmt.Sprintf("[Audio Response: %s, MIME: %s]\n", content.Data, content.MIMEType))
- case mcp.EmbeddedResource:
- result.WriteString(fmt.Sprintf("[Embedded Resource Response: %s]\n", content.Type))
- default:
- // Fallback: try to extract from map structure
- if jsonBytes, err := json.Marshal(contentBlock); err == nil {
- var contentMap map[string]interface{}
- if json.Unmarshal(jsonBytes, &contentMap) == nil {
- if text, ok := contentMap["text"].(string); ok {
- result.WriteString(fmt.Sprintf("[Text Response: %s]\n", text))
- continue
- }
- }
- // Final fallback: serialize as JSON
- result.WriteString(string(jsonBytes))
- }
- }
- }
-
- if result.Len() > 0 {
- return strings.TrimSpace(result.String())
- }
- return fmt.Sprintf("MCP tool '%s' executed successfully", toolName)
-}
-
-// createToolResponseMessage creates a tool response message with the execution result.
-func (m *MCPManager) createToolResponseMessage(toolCall schemas.ChatAssistantMessageToolCall, responseText string) *schemas.ChatMessage {
- return &schemas.ChatMessage{
- Role: schemas.ChatMessageRoleTool,
- Content: &schemas.ChatMessageContent{
- ContentStr: &responseText,
- },
- ChatToolMessage: &schemas.ChatToolMessage{
- ToolCallID: toolCall.ID,
- },
- }
-}
-
-func (m *MCPManager) addMCPToolsToBifrostRequest(ctx context.Context, req *schemas.BifrostRequest) *schemas.BifrostRequest {
- mcpTools := m.getAvailableTools(ctx)
- if len(mcpTools) > 0 {
- m.logger.Debug(fmt.Sprintf("%s Adding %d MCP tools to request", MCPLogPrefix, len(mcpTools)))
- switch req.RequestType {
- case schemas.ChatCompletionRequest, schemas.ChatCompletionStreamRequest:
- // Only allocate new Params if it's nil to preserve caller-supplied settings
- if req.ChatRequest.Params == nil {
- req.ChatRequest.Params = &schemas.ChatParameters{}
- }
-
- tools := req.ChatRequest.Params.Tools
-
- // Create a map of existing tool names for O(1) lookup
- existingToolsMap := make(map[string]bool)
- for _, tool := range tools {
- if tool.Function != nil && tool.Function.Name != "" {
- existingToolsMap[tool.Function.Name] = true
- }
- }
-
- // Add MCP tools that are not already present
- for _, mcpTool := range mcpTools {
- // Skip tools with nil Function or empty Name
- if mcpTool.Function == nil || mcpTool.Function.Name == "" {
- continue
- }
-
- if !existingToolsMap[mcpTool.Function.Name] {
- tools = append(tools, mcpTool)
- // Update the map to prevent duplicates within MCP tools as well
- existingToolsMap[mcpTool.Function.Name] = true
- }
- }
- req.ChatRequest.Params.Tools = tools
- case schemas.ResponsesRequest, schemas.ResponsesStreamRequest:
- // Only allocate new Params if it's nil to preserve caller-supplied settings
- if req.ResponsesRequest.Params == nil {
- req.ResponsesRequest.Params = &schemas.ResponsesParameters{}
- }
-
- tools := req.ResponsesRequest.Params.Tools
-
- // Create a map of existing tool names for O(1) lookup
- existingToolsMap := make(map[string]bool)
- for _, tool := range tools {
- if tool.Name != nil {
- existingToolsMap[*tool.Name] = true
- }
- }
-
- // Add MCP tools that are not already present
- for _, mcpTool := range mcpTools {
- // Skip tools with nil Function or empty Name
- if mcpTool.Function == nil || mcpTool.Function.Name == "" {
- continue
- }
-
- if !existingToolsMap[mcpTool.Function.Name] {
- responsesTool := mcpTool.ToResponsesTool()
- // Skip if the converted tool has nil Name
- if responsesTool.Name == nil {
- continue
- }
-
- tools = append(tools, *responsesTool)
- // Update the map to prevent duplicates within MCP tools as well
- existingToolsMap[*responsesTool.Name] = true
- }
- }
- req.ResponsesRequest.Params.Tools = tools
- }
- }
- return req
-}
-
-func validateMCPClientConfig(config *schemas.MCPClientConfig) error {
- if strings.TrimSpace(config.ID) == "" {
- return fmt.Errorf("id is required for MCP client config")
- }
-
- if strings.TrimSpace(config.Name) == "" {
- return fmt.Errorf("name is required for MCP client config")
- }
-
- if config.ConnectionType == "" {
- return fmt.Errorf("connection type is required for MCP client config")
- }
-
- switch config.ConnectionType {
- case schemas.MCPConnectionTypeHTTP:
- if config.ConnectionString == nil {
- return fmt.Errorf("ConnectionString is required for HTTP connection type in client '%s'", config.Name)
- }
- case schemas.MCPConnectionTypeSSE:
- if config.ConnectionString == nil {
- return fmt.Errorf("ConnectionString is required for SSE connection type in client '%s'", config.Name)
- }
- case schemas.MCPConnectionTypeSTDIO:
- if config.StdioConfig == nil {
- return fmt.Errorf("StdioConfig is required for STDIO connection type in client '%s'", config.Name)
- }
- case schemas.MCPConnectionTypeInProcess:
- // InProcess requires a server instance to be provided programmatically
- // This cannot be validated from JSON config - the server must be set when using the Go package
- if config.InProcessServer == nil {
- return fmt.Errorf("InProcessServer is required for InProcess connection type in client '%s' (Go package only)", config.Name)
- }
- default:
- return fmt.Errorf("unknown connection type '%s' in client '%s'", config.ConnectionType, config.Name)
- }
-
- return nil
-}
-
-// ============================================================================
-// HELPER METHODS
-// ============================================================================
-
-// findMCPClientForTool safely finds a client that has the specified tool.
-func (m *MCPManager) findMCPClientForTool(toolName string) *MCPClient {
- m.mu.RLock()
- defer m.mu.RUnlock()
-
- for _, client := range m.clientMap {
- if _, exists := client.ToolMap[toolName]; exists {
- return client
- }
- }
- return nil
-}
-
-// shouldIncludeClient determines if a client should be included based on filtering rules.
-func (m *MCPManager) shouldIncludeClient(clientID string, includeClients []string) bool {
- // If includeClients is specified (not nil), apply whitelist filtering
- if includeClients != nil {
- // Handle empty array [] - means no clients are included
- if len(includeClients) == 0 {
- return false // No clients allowed
- }
-
- // Handle wildcard "*" - if present, all clients are included
- if slices.Contains(includeClients, "*") {
- return true // All clients allowed
- }
-
- // Check if specific client is in the list
- return slices.Contains(includeClients, clientID)
- }
-
- // Default: include all clients when no filtering specified (nil case)
- return true
-}
-
-// createHTTPConnection creates an HTTP-based MCP client connection without holding locks.
-func (m *MCPManager) createHTTPConnection(config schemas.MCPClientConfig) (*client.Client, MCPClientConnectionInfo, error) {
- if config.ConnectionString == nil {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("HTTP connection string is required")
- }
-
- // Prepare connection info
- connectionInfo := MCPClientConnectionInfo{
- Type: config.ConnectionType,
- ConnectionURL: config.ConnectionString,
- }
-
- // Create StreamableHTTP transport
- httpTransport, err := transport.NewStreamableHTTP(*config.ConnectionString, transport.WithHTTPHeaders(config.Headers))
- if err != nil {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("failed to create HTTP transport: %w", err)
- }
-
- client := client.NewClient(httpTransport)
-
- return client, connectionInfo, nil
-}
-
-// createSTDIOConnection creates a STDIO-based MCP client connection without holding locks.
-func (m *MCPManager) createSTDIOConnection(config schemas.MCPClientConfig) (*client.Client, MCPClientConnectionInfo, error) {
- if config.StdioConfig == nil {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("stdio config is required")
- }
-
- // Prepare STDIO command info for display
- cmdString := fmt.Sprintf("%s %s", config.StdioConfig.Command, strings.Join(config.StdioConfig.Args, " "))
-
- // Check if environment variables are set
- for _, env := range config.StdioConfig.Envs {
- if os.Getenv(env) == "" {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("environment variable %s is not set for MCP client %s", env, config.Name)
- }
- }
-
- // Create STDIO transport
- stdioTransport := transport.NewStdio(
- config.StdioConfig.Command,
- config.StdioConfig.Envs,
- config.StdioConfig.Args...,
- )
-
- // Prepare connection info
- connectionInfo := MCPClientConnectionInfo{
- Type: config.ConnectionType,
- StdioCommandString: &cmdString,
- }
-
- client := client.NewClient(stdioTransport)
-
- // Return nil for cmd since mark3labs/mcp-go manages the process internally
- return client, connectionInfo, nil
-}
-
-// createSSEConnection creates a SSE-based MCP client connection without holding locks.
-func (m *MCPManager) createSSEConnection(config schemas.MCPClientConfig) (*client.Client, MCPClientConnectionInfo, error) {
- if config.ConnectionString == nil {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("SSE connection string is required")
- }
-
- // Prepare connection info
- connectionInfo := MCPClientConnectionInfo{
- Type: config.ConnectionType,
- ConnectionURL: config.ConnectionString, // Reuse HTTPConnectionURL field for SSE URL display
- }
-
- // Create SSE transport
- sseTransport, err := transport.NewSSE(*config.ConnectionString, transport.WithHeaders(config.Headers))
- if err != nil {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("failed to create SSE transport: %w", err)
- }
-
- client := client.NewClient(sseTransport)
-
- return client, connectionInfo, nil
-}
-
-// createInProcessConnection creates an in-process MCP client connection without holding locks.
-// This allows direct connection to an MCP server running in the same process, providing
-// the lowest latency and highest performance for tool execution.
-func (m *MCPManager) createInProcessConnection(config schemas.MCPClientConfig) (*client.Client, MCPClientConnectionInfo, error) {
- if config.InProcessServer == nil {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("InProcess connection requires a server instance")
- }
- // Create in-process client directly connected to the provided server
- inProcessClient, err := client.NewInProcessClient(config.InProcessServer)
- if err != nil {
- return nil, MCPClientConnectionInfo{}, fmt.Errorf("failed to create in-process client: %w", err)
- }
-
- // Prepare connection info
- connectionInfo := MCPClientConnectionInfo{
- Type: config.ConnectionType,
- }
-
- return inProcessClient, connectionInfo, nil
-}
-
-// cleanup performs cleanup of all MCP resources including clients and local server.
-// This function safely disconnects all MCP clients (HTTP, STDIO, and SSE) and
-// cleans up the local MCP server. It handles proper cancellation of SSE contexts
-// and closes all transport connections.
-//
-// Returns:
-// - error: Always returns nil, but maintains error interface for consistency
-func (m *MCPManager) cleanup() error {
- m.mu.Lock()
- defer m.mu.Unlock()
-
- // Disconnect all external MCP clients
- for id := range m.clientMap {
- if err := m.removeClientUnsafe(id); err != nil {
- m.logger.Error("%s Failed to remove MCP client %s: %v", MCPLogPrefix, id, err)
- }
- }
-
- // Clear the client map
- m.clientMap = make(map[string]*MCPClient)
-
- // Clear local server reference
- // Note: mark3labs/mcp-go STDIO server cleanup is handled automatically
- if m.server != nil {
- m.logger.Info(MCPLogPrefix + " Clearing local MCP server reference")
- m.server = nil
- m.serverRunning = false
- }
-
- m.logger.Info(MCPLogPrefix + " MCP cleanup completed")
- return nil
-}
diff --git a/docs/architecture/framework/model-catalog.mdx b/docs/architecture/framework/model-catalog.mdx
index 100f57e7c8..20a3e6b931 100644
--- a/docs/architecture/framework/model-catalog.mdx
+++ b/docs/architecture/framework/model-catalog.mdx
@@ -6,6 +6,10 @@ icon: "book-open"
The Model Catalog is a foundational component of Bifrost that provides a unified interface for managing AI models, including their pricing, capabilities, and availability. It serves as a centralized repository for all model-related information, enabling dynamic cost calculation, intelligent model routing, and efficient resource management.
+
+**Related Documentation**: The Model Catalog powers Bifrost's intelligent routing system. See [Provider Routing](/providers/provider-routing) for detailed examples of how governance and load balancing use the catalog to make routing decisions, including cross-provider scenarios and weighted routing via proxy providers.
+
+
## Core Features
### **1. Automatic Pricing Synchronization**
@@ -28,9 +32,10 @@ It supports diverse pricing models across different AI operation types:
- **Image Processing**: Per-image costs with tiered pricing for high-token contexts.
### **3. Model Information Management**
-The Model Catalog maintains a pool of available models for each provider, populated from the pricing data. This allows for:
-- Listing all available models for a given provider.
-- Finding all providers that support a specific model.
+The Model Catalog maintains a pool of available models for each provider, populated from both pricing data and provider list models APIs. This enables:
+- **Model Discovery**: Listing all available models for a given provider
+- **Provider Discovery**: Finding all providers that support a specific model with intelligent cross-provider resolution (OpenRouter, Vertex, Groq, Bedrock)
+- **Model Validation**: Checking if a model is allowed for a provider based on allowed models lists (supports provider-prefixed entries)
### **4. Intelligent Cache Cost Handling**
It integrates with semantic caching to provide accurate cost calculations:
@@ -130,6 +135,12 @@ type PricingEntry struct {
## Usage in Plugins
+The Model Catalog is designed to be shared across all Bifrost plugins, providing consistent model information and validation logic for governance, load balancing, and other routing mechanisms.
+
+
+**Governance & Load Balancing**: Both plugins delegate model validation to the Model Catalog's `IsModelAllowedForProvider` method, ensuring consistent handling of cross-provider scenarios and provider-prefixed allowed models. See [Provider Routing](/providers/provider-routing) for configuration examples.
+
+
### Initialization
In Bifrost's gateway, the `ModelCatalog` is initialized once at the start and shared across all plugins:
@@ -199,19 +210,89 @@ Retrieve a list of all models supported by a specific provider.
```go
openaiModels := modelCatalog.GetModelsForProvider(schemas.OpenAI)
for _, model := range openaiModels {
- logger.Info("Found OpenAI model: %s", model.ID)
+ logger.Info("Found OpenAI model: %s", model)
}
```
+**Thread-safe**: Uses read lock for concurrent access.
+
#### Get Providers for a Model
-Find all providers that offer a specific model.
+Find all providers that offer a specific model, including cross-provider resolution.
+
```go
-gpt4Providers := modelCatalog.GetProvidersForModel("gpt-4")
+gpt4Providers := modelCatalog.GetProvidersForModel("gpt-4o")
for _, provider := range gpt4Providers {
- logger.Info("gpt-4 is available from: %s", provider)
+ logger.Info("gpt-4o is available from: %s", provider)
}
+// Result: [openai, azure, groq] (includes cross-provider mappings)
```
+**Cross-Provider Resolution**:
+
+This method implements intelligent cross-provider routing logic to discover all providers that can serve a model:
+
+1. **Direct Match**: Checks each provider's model list in `modelPool` for the exact model name
+2. **OpenRouter Format**: For models found in other providers, checks if `provider/model` exists in OpenRouter
+ - Example: `claude-3-5-sonnet` found in Anthropic → checks OpenRouter for `anthropic/claude-3-5-sonnet`
+3. **Vertex Format**: Similar check for Vertex with `provider/model` format
+4. **Groq OpenAI Compatibility**: For GPT models, checks if `openai/model` exists in Groq's catalog
+5. **Bedrock Claude Models**: For Claude models, flexible matching against Bedrock's full ARN format
+
+**Example**:
+```go
+providers := modelCatalog.GetProvidersForModel("claude-3-5-sonnet")
+// Returns: [anthropic, vertex, bedrock, openrouter]
+// Even though request was just "claude-3-5-sonnet" without provider prefix!
+```
+
+
+This cross-provider logic powers Bifrost's intelligent routing capabilities. See [Provider Routing](/providers/provider-routing#the-model-catalog) for detailed examples of how this enables features like weighted routing via proxy providers.
+
+
+#### Check Model Allowance for Provider
+Validate if a model is allowed for a specific provider based on an allowed models list. This method is used internally by governance and load balancing plugins.
+
+```go
+// Empty allowedModels - uses catalog to determine support
+isAllowed := modelCatalog.IsModelAllowedForProvider(
+ schemas.OpenRouter,
+ "gpt-4o",
+ []string{}, // empty = check catalog
+)
+// Returns: true (catalog knows OpenRouter supports openai/gpt-4o)
+
+// Explicit allowedModels with provider prefix
+isAllowed := modelCatalog.IsModelAllowedForProvider(
+ schemas.OpenRouter,
+ "gpt-4o",
+ []string{"openai/gpt-4o", "anthropic/claude-3-5-sonnet"},
+)
+// Returns: true (strips "openai/" prefix and matches "gpt-4o")
+
+// Explicit allowedModels without prefix
+isAllowed := modelCatalog.IsModelAllowedForProvider(
+ schemas.OpenAI,
+ "gpt-4o",
+ []string{"gpt-4o", "gpt-4o-mini"},
+)
+// Returns: true (direct match)
+```
+
+**Behavior**:
+- **Empty `allowedModels`**: Delegates to `GetProvidersForModel` (includes cross-provider logic)
+- **Non-empty `allowedModels`**: Checks for both direct matches and provider-prefixed entries
+ - Direct: `"gpt-4o"` matches `"gpt-4o"`
+ - Prefixed: `"openai/gpt-4o"` matches request for `"gpt-4o"` (prefix stripped)
+
+**Use Cases**:
+- **Governance Routing**: Validate if a model request is allowed for a provider configuration
+- **Load Balancing**: Filter providers based on allowed models before performance scoring
+- **Virtual Key Validation**: Check if a model can be used with a specific virtual key's provider configs
+
+
+This method is the central validation point for both governance and load balancing plugins, ensuring consistent model allowance logic across all routing mechanisms. It handles all edge cases including proxy providers (OpenRouter, Vertex) and provider-prefixed model entries.
+
+
#### Dynamically Add Models
You can dynamically add models to the catalog's pool from a `v1/models` compatible response structure. This is useful for providers that expose a model list endpoint.
```go
@@ -219,6 +300,11 @@ You can dynamically add models to the catalog's pool from a `v1/models` compatib
modelCatalog.AddModelDataToPool(response)
```
This is automatically done in Bifrost gateway initialization for all providers that are supported by Bifrost.
+
+**When to use**:
+- After fetching models from a provider's `/v1/models` endpoint
+- When a new provider is dynamically added at runtime
+- For testing with custom model lists
### Reloading Configuration
You can reload the pricing configuration at runtime if you need to change the pricing URL or sync interval.
```go
diff --git a/docs/providers/provider-routing.mdx b/docs/providers/provider-routing.mdx
index afcb085f9a..eb5a60fa9a 100644
--- a/docs/providers/provider-routing.mdx
+++ b/docs/providers/provider-routing.mdx
@@ -25,106 +25,756 @@ When both methods are available, **governance takes precedence** because users h
The Model Catalog is Bifrost's central registry that tracks which models are available from which providers. It powers both governance-based routing and adaptive load balancing by maintaining an up-to-date mapping of models to providers.
+
+**Architecture Documentation**: For detailed technical documentation on the Model Catalog implementation, including API reference, thread safety, and advanced usage patterns, see [Model Catalog Architecture](/architecture/framework/model-catalog).
+
+
### Data Sources
-The Model Catalog combines two data sources:
+The Model Catalog combines two data sources to maintain a comprehensive and up-to-date model registry:
1. **Pricing Data** (Primary source)
- - Downloaded from a remote URL (configurable, defaults to Maxim's pricing endpoint)
+ - Downloaded from a remote URL (configurable, defaults to `https://getbifrost.ai/datasheet`)
- Contains model names, pricing tiers, and provider mappings
- - Synced to database on startup and refreshed every hour
+ - Synced to database on startup and refreshed periodically (default: every 24 hours)
- Used for cost calculation and initial model-to-provider mapping
+ - **Stored as**: In-memory map `pricingData[model|provider|mode]` for O(1) lookups
2. **Provider List Models API** (Secondary source)
- - Calls each provider's `/v1/models` endpoint
+ - Calls each provider's `/v1/models` endpoint during startup
- Enriches the catalog with provider-specific models and aliases
- - Called on Bifrost startup and when providers are added/updated
- - Adds models that may not be in pricing data yet
+ - Re-fetched when providers are added/updated via API or dashboard
+ - Adds models that may not be in pricing data yet (e.g., newly released models)
+ - **Stored as**: In-memory map `modelPool[provider][]models`
+
+
+**Why two sources?** Pricing data provides comprehensive model coverage with cost information, while the List Models API ensures you can use newly released models immediately without waiting for pricing data updates.
+
+
+### How Model Availability is Determined
+
+Bifrost uses a sophisticated multi-step process to determine if a model is available for a provider:
+
+
+
+ **Purpose**: Find all models available for a specific provider
+
+ **Lookup Process**:
+ 1. Check `modelPool[provider]` for direct matches
+ 2. Return all models in that provider's slice
+
+ **Example**:
+ ```go
+ models := GetModelsForProvider("openai")
+ // Returns: ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo", ...]
+ ```
+
+ **Used by**:
+ - Routing Methods to validate `allowed_models`
+ - Dashboard model selector dropdowns
+ - API responses for `/v1/models?provider=openai`
+
+
+
+ **Purpose**: Find all providers that support a specific model
+
+ **Lookup Process**:
+ 1. **Direct lookup**: Check each provider's model list in `modelPool`
+ 2. **Cross-provider resolution**: Apply special handling for proxy providers
+
+ **Special Cross-Provider Rules**:
+
+
+
+ If model is not found directly, check if `provider/model` exists in OpenRouter
+ ```go
+ // Request: claude-3-5-sonnet
+ // Checks: openrouter models for "anthropic/claude-3-5-sonnet"
+ // Result: Adds "openrouter" to providers list
+ ```
+
+
+
+ If model is not found directly, check if `provider/model` exists in Vertex
+ ```go
+ // Request: claude-3-5-sonnet
+ // Checks: vertex models for "anthropic/claude-3-5-sonnet"
+ // Result: Adds "vertex" to providers list
+ ```
+
+
+
+ For GPT models, check if `openai/model` exists in Groq
+ ```go
+ // Request: gpt-3.5-turbo
+ // Checks: groq models for "openai/gpt-3.5-turbo"
+ // Result: Adds "groq" to providers list
+ ```
+
+
+
+ For Claude models, check Bedrock with flexible matching
+ ```go
+ // Request: claude-3-5-sonnet
+ // Checks: bedrock models containing "claude-3-5-sonnet"
+ // Matches: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+ // Result: Adds "bedrock" to providers list
+ ```
+
+
+
+ **Example**:
+ ```go
+ providers := GetProvidersForModel("claude-3-5-sonnet")
+ // Returns: ["anthropic", "vertex", "bedrock", "openrouter"]
+ // Even though the request was just "claude-3-5-sonnet"!
+ ```
+
+ **Used by**:
+ - Load balancing to find candidate providers
+ - Fallback generation
+ - Model validation in requests
+
+
+
+ **Purpose**: Get pricing data for cost calculation and model validation
+
+ **Lookup Key**: `model|provider|mode` (e.g., `gpt-4o|openai|chat`)
+
+ **Fallback Chain**:
+ 1. **Primary lookup**: `model|provider|requestType`
+ 2. **Gemini → Vertex**: If Gemini not found, try Vertex with same model
+ 3. **Vertex format stripping**: For `provider/model`, strip prefix and retry
+ 4. **Bedrock prefix handling**: For Claude models, try with `anthropic.` prefix
+ 5. **Responses → Chat**: If Responses mode not found, try Chat mode
+
+ **Example Flow**:
+ ```go
+ // Request: claude-3-5-sonnet on Gemini (Responses API)
+
+ // 1. Try: claude-3-5-sonnet|gemini|responses → Not found
+ // 2. Try: claude-3-5-sonnet|vertex|responses → Not found
+ // 3. Try: claude-3-5-sonnet|vertex|chat → ✅ Found!
+
+ // Pricing returned from vertex/chat mode
+ ```
+
+ **Used by**:
+ - Cost calculation for billing
+ - Model validation during routing
+ - Budget enforcement
+
+
### Syncing Behavior
- When Bifrost starts:
- 1. **Pricing data** is loaded from the remote URL
- 2. If successful, data is stored in the database (if config store is available)
- 3. **Model pool** is populated from pricing data
- 4. **List models API** is called for all configured providers
- 5. Results are added to the model pool
-
- If list models API fails for a provider:
- ```json
- {"level":"warn","message":"failed to list models for provider ollama: failed to execute HTTP request to provider API"}
- ```
- - This is logged as a warning but **does not stop startup**
- - The provider can still be used with models from pricing data
+ When Bifrost starts, it performs a complete model catalog initialization:
+
+ **Step-by-step process** (from `server.go:Bootstrap()`):
+
+
+
+ ```go
+ // 1. Download from URL
+ pricingData := loadPricingFromURL(ctx)
+
+ // 2. Store in database (if configStore available)
+ configStore.CreateModelPrices(ctx, pricingData)
+
+ // 3. Load into memory cache
+ mc.pricingData = map[string]TableModelPricing{...}
+ ```
+
+
+
+ ```go
+ // Build modelPool from pricing data
+ mc.populateModelPoolFromPricingData()
+ // Result: modelPool[provider] = [models from pricing]
+ ```
+
+
+
+ ```go
+ // Call ListAllModels for all configured providers
+ modelData, err := client.ListAllModels(ctx, nil)
+
+ // Add results to model pool
+ mc.AddModelDataToPool(modelData)
+ // Result: modelPool enriched with provider-specific models
+ ```
+
+
+
+ If list models API fails for a provider:
+ ```json
+ {"level":"warn","message":"failed to list models for provider ollama: connection refused"}
+ ```
+ - Logged as warning, **does not stop startup**
+ - Provider remains usable with models from pricing data
+ - Can be manually refreshed later via API
+
+
+
+ **Result**: Bifrost is ready with a comprehensive model catalog combining both sources.
- While Bifrost is running:
- - **Pricing data**: Background worker checks every hour and syncs if interval elapsed
- - **List models API**: Re-fetched when provider is added/updated via API or dashboard
-
- Sync failures are handled gracefully:
- - If pricing URL fails but database has existing data → Use database
- - If pricing URL fails and no database data → Error (startup fails)
- - If list models API fails → Log warning, continue with pricing data only
+ While Bifrost is running, the catalog stays up-to-date through background workers:
+
+ **Pricing Data Sync**:
+ - Background worker runs every **1 hour** (ticker interval)
+ - Checks if **24 hours** have elapsed since last sync (configurable)
+ - If yes, downloads fresh pricing data and updates database + memory cache
+ - Timer resets after successful sync
+
+ **List Models API Sync**:
+ Triggered by these events:
+ 1. **Provider Added**: When a new provider is configured
+ ```bash
+ POST /api/v1/providers
+ # Automatically calls ListModels for the new provider
+ ```
+
+ 2. **Provider Updated**: When provider config changes (keys, endpoints, etc.)
+ ```bash
+ PUT /api/v1/providers/{provider}
+ # Refetches models to detect changes
+ ```
+
+ 3. **Manual Refresh**: Via API endpoint
+ ```bash
+ POST /api/v1/providers/{provider}/models/refetch
+ # Explicitly refetches models for a provider
+ ```
+
+ 4. **Manual Delete + Refetch**: Clear and reload models
+ ```bash
+ DELETE /api/v1/providers/{provider}/models
+ POST /api/v1/providers/{provider}/models/refetch
+ # Useful when models are out of sync
+ ```
+
+ **Failure Handling**:
+ - Pricing URL fails but database has data → Use cached database records
+ - Pricing URL fails and no database data → Error logged, existing memory cache retained
+ - List models API fails → Log warning, retain existing model pool entries
- When syncing fails:
- 1. **Pricing data failure**: Use existing database records (requires config store)
- 2. **List models failure**: Rely on pricing data only
- 3. **Empty `allowed_models`**: Use model catalog to validate which models are supported
+ Bifrost's multi-layered approach ensures high availability:
- This multi-layered approach ensures routing continues even with partial sync failures.
+ **Layer 1: Pricing Data Persistence**
+ ```
+ URL fails → Database → Memory cache → Continue operation
+ ```
+
+ **Layer 2: Model Pool Redundancy**
+ ```
+ ListModels fails → Pricing data models → Continue with reduced catalog
+ ```
+
+ **Layer 3: Runtime Validation**
+ ```
+ Model not in catalog → Special cross-provider rules → May still work
+ ```
+
+ **Example Scenario**:
+ ```
+ Situation:
+ - Pricing URL is down
+ - OpenAI ListModels API is down
+ - User requests gpt-4o on OpenAI
+
+ Bifrost's Response:
+ 1. ✅ Pricing data available from database (last sync 12h ago)
+ 2. ✅ Model pool has gpt-4o from previous ListModels call
+ 3. ✅ Request proceeds normally
+ 4. 📊 Cost calculated from cached pricing data
+ ```
+
+ This design ensures **requests never fail due to sync issues** as long as one data source is available.
+### Allowed Models Behavior with Examples
+
+The `allowed_models` field in provider configs controls which models can be used with that provider. Understanding its behavior is crucial for governance routing.
+
+
+
+
+**Configuration**:
+```json
+{
+ "provider_configs": [
+ {
+ "provider": "openai",
+ "allowed_models": [], // Empty = defer to catalog
+ "weight": 1.0
+ }
+ ]
+}
+```
+
+**Behavior**:
+- Bifrost calls `GetModelsForProvider("openai")`
+- Returns all models in `modelPool["openai"]`
+- Request validated against catalog
+
+**Examples**:
+```bash
+# ✅ Allowed (in catalog)
+curl -H "x-bf-vk: vk-123" -d '{"model": "gpt-4o"}'
+
+# ✅ Allowed (in catalog)
+curl -H "x-bf-vk: vk-123" -d '{"model": "gpt-3.5-turbo"}'
+
+# ❌ Rejected (not in OpenAI catalog)
+curl -H "x-bf-vk: vk-123" -d '{"model": "claude-3-5-sonnet"}'
+```
+
+**Use Cases**:
+- Default behavior for most deployments
+- Automatically stays up-to-date with provider's model offerings
+- No manual model list maintenance required
+
+
+
+
+
+**Configuration**:
+```json
+{
+ "provider_configs": [
+ {
+ "provider": "openai",
+ "allowed_models": ["gpt-4o", "gpt-4o-mini"], // Only these two
+ "weight": 1.0
+ },
+ {
+ "provider": "anthropic",
+ "allowed_models": ["claude-3-5-sonnet-20241022"], // Specific version
+ "weight": 1.0
+ }
+ ]
+}
+```
+
+**Behavior**:
+- Bifrost validates request model against explicit list
+- Catalog is **ignored** for this provider
+- Supports both direct matches and provider-prefixed entries
+- Case-sensitive matching
+
+**Examples**:
+```bash
+# ✅ Allowed (in explicit list)
+curl -H "x-bf-vk: vk-123" -d '{"model": "gpt-4o"}'
+
+# ❌ Rejected (not in explicit list)
+curl -H "x-bf-vk: vk-123" -d '{"model": "gpt-4-turbo"}'
+# Even though gpt-4-turbo is in the OpenAI catalog!
+
+# ✅ Allowed (exact match for Anthropic)
+curl -H "x-bf-vk: vk-123" -d '{"model": "claude-3-5-sonnet-20241022"}'
+
+# ❌ Rejected (version mismatch)
+curl -H "x-bf-vk: vk-123" -d '{"model": "claude-3-5-sonnet-20240620"}'
+```
+
+**Provider-Prefixed Entries**:
+
+You can also use provider-prefixed model names in `allowed_models`. Bifrost will strip the prefix and match against the requested model:
+
+```json
+{
+ "provider_configs": [
+ {
+ "provider": "openrouter",
+ "allowed_models": ["openai/gpt-4o", "anthropic/claude-3-5-sonnet"],
+ "weight": 1.0
+ }
+ ]
+}
+```
+
+**How it works**:
+```bash
+# Request without prefix
+curl -H "x-bf-vk: vk-123" -d '{"model": "gpt-4o"}'
+
+# 1. Checks: "openai/gpt-4o" in allowed_models
+# 2. Strips prefix: "openai/gpt-4o" → "gpt-4o"
+# 3. Compares: "gpt-4o" == "gpt-4o" ✅
+# 4. Result: Allowed and routed to OpenRouter
+```
+
+This is particularly useful for proxy providers (OpenRouter, Vertex) where you want to explicitly control which upstream models are accessible.
+
+**Use Cases**:
+- Compliance requirements (only approved models)
+- Cost control (restrict to cheaper models)
+- Version pinning (prevent automatic updates)
+- Testing specific model versions
+- **Explicit cross-provider routing** (e.g., only allow OpenAI models via OpenRouter)
+
+
+
+
+
+**Key Concept**: Deployments are **key-specific** mappings that allow user-friendly model names to map to provider-specific deployment identifiers.
+
+**How Deployments Work**:
+- Defined at the **Key level**, not Virtual Key level
+- Structure: `deployments: {"alias": "deployment-id"}`
+- **Alias** (left side): User-facing model name used in requests
+- **Deployment ID** (right side): Provider-specific identifier sent to the API
+
+**Azure OpenAI Example**:
+
+Provider configuration with deployment mapping:
+```json
+{
+ "providers": {
+ "azure": {
+ "keys": [
+ {
+ "name": "azure-prod-key",
+ "value": "your-api-key",
+ "models": [], // Not used when deployments exist
+ "azure_key_config": {
+ "endpoint": "https://your-resource.openai.azure.com",
+ "deployments": {
+ "gpt-4o": "my-prod-gpt4o-deployment",
+ "gpt-4o-mini": "my-mini-deployment"
+ }
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+**What Happens**:
+1. **Allowed models derived from aliases**: `["gpt-4o", "gpt-4o-mini"]`
+2. **User requests with alias**: `{"model": "gpt-4o"}`
+3. **Bifrost validates**: `gpt-4o` is in derived allowed models ✅
+4. **Bifrost maps to deployment**: `gpt-4o` → `my-prod-gpt4o-deployment`
+5. **Sent to Azure**: Uses `my-prod-gpt4o-deployment` as the deployment name
+6. **Pricing lookup**: If pricing for deployment not found, falls back to alias `gpt-4o`
+
+**Bedrock Example with Inference Profiles**:
+
+```json
+{
+ "providers": {
+ "bedrock": {
+ "keys": [
+ {
+ "name": "bedrock-key",
+ "models": [],
+ "bedrock_key_config": {
+ "access_key": "your-access-key",
+ "secret_key": "your-secret-key",
+ "region": "us-east-1",
+ "deployments": {
+ "claude-sonnet": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
+ "claude-opus": "us.anthropic.claude-3-opus-20240229-v1:0"
+ }
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+**What Happens**:
+1. **Allowed models**: `["claude-sonnet", "claude-opus"]` (from deployment aliases)
+2. **User requests**: `{"model": "claude-sonnet"}`
+3. **Bifrost validates**: `claude-sonnet` in allowed models ✅
+4. **Maps to inference profile**: `claude-sonnet` → `us.anthropic.claude-3-5-sonnet-20241022-v2:0`
+5. **Sent to Bedrock**: Full ARN used in API call
+
+**Priority of Model Restrictions**:
+
+When determining allowed models for a key:
+```
+1. If key.models is NOT empty → Use key.models
+2. Else if deployments exist → Use deployment aliases (map keys)
+3. Else → All models allowed (use Model Catalog)
+```
+
+**Example with Both**:
+```json
+{
+ "keys": [
+ {
+ "models": ["gpt-4o", "gpt-3.5-turbo"], // Explicit restriction
+ "azure_key_config": {
+ "deployments": {
+ "gpt-4o": "my-deployment",
+ "gpt-4-turbo": "another-deployment" // NOT accessible!
+ }
+ }
+ }
+ ]
+}
+```
+Result: Only `["gpt-4o", "gpt-3.5-turbo"]` allowed (models field takes priority)
+
+**Vertex Example** (similar pattern):
+```json
+{
+ "keys": [
+ {
+ "vertex_key_config": {
+ "project_id": "my-project",
+ "region": "us-central1",
+ "deployments": {
+ "claude-3-5-sonnet": "anthropic/claude-3-5-sonnet@20241022",
+ "gemini-pro": "google/gemini-1.5-pro"
+ }
+ }
+ }
+ ]
+}
+```
+
+**Use Cases for Deployments**:
+- **Azure**: Map generic model names to specific deployment names in your Azure resource
+- **Bedrock**: Use short aliases for long inference profile ARNs
+- **Vertex**: Map to specific model versions or regional endpoints
+- **Multi-environment**: Different deployments per key (dev/staging/prod)
+
+**Key Insight**:
+```
+User Request: {"model": "gpt-4o"}
+ ↓
+Validation: Check if "gpt-4o" in allowed models (derived from deployments)
+ ↓
+Mapping: deployments["gpt-4o"] → "my-prod-gpt4o-deployment"
+ ↓
+API Call: Uses "my-prod-gpt4o-deployment" as deployment ID
+ ↓
+Pricing: Falls back to "gpt-4o" if deployment not in pricing data
+```
+
+This allows user-friendly model names in requests while supporting provider-specific deployment patterns at the key level.
+
+
+
+
+
+**Configuration**:
+```json
+{
+ "provider_configs": [
+ {
+ "provider": "openai",
+ "allowed_models": ["gpt-4o"],
+ "weight": 0.5
+ },
+ {
+ "provider": "azure",
+ "allowed_models": ["gpt-4o"],
+ "weight": 0.5
+ }
+ ]
+}
+```
+
+**Request**:
+```bash
+curl -H "x-bf-vk: vk-123" \
+ -d '{"model": "gpt-4o"}'
+```
+
+**Routing Behavior**:
+1. **Model validation**: Both providers have `gpt-4o` in allowed_models ✅
+2. **Weighted selection**: 50% chance each
+3. **Provider selected**: Let's say Azure
+4. **Model transformation**: `gpt-4o` → `azure/gpt-4o`
+5. **Fallbacks**: `["openai/gpt-4o"]` (remaining providers)
+
+**Special Cross-Provider Scenarios**:
+
+
+
+ ```json
+ {
+ "provider_configs": [
+ {
+ "provider": "openrouter",
+ "allowed_models": [] // Use catalog
+ }
+ ]
+ }
+ ```
+
+ Request `claude-3-5-sonnet`:
+ - Bifrost checks: `GetModelsForProvider("openrouter")`
+ - Finds: `anthropic/claude-3-5-sonnet` in OpenRouter catalog
+ - ✅ Allowed, routes to OpenRouter
+
+
+
+ **Use Case**: Route 99% of OpenAI traffic through OpenRouter for cost savings, keep 1% direct for fallback
+
+ ```json
+ {
+ "provider_configs": [
+ {
+ "provider": "openai",
+ "allowed_models": ["gpt-4o"],
+ "weight": 0.01 // 1% direct to OpenAI
+ },
+ {
+ "provider": "openrouter",
+ "allowed_models": ["openai/gpt-4o"], // Provider-prefixed
+ "weight": 0.99 // 99% via OpenRouter
+ }
+ ]
+ }
+ ```
+
+ Request `gpt-4o`:
+ - **OpenAI check**: `"gpt-4o"` in `["gpt-4o"]` → ✅ Allowed
+ - **OpenRouter check**: Strips prefix from `"openai/gpt-4o"` → matches `"gpt-4o"` → ✅ Allowed
+ - **Weighted selection**: 99% chance → OpenRouter selected
+ - **Final model**: `openrouter/gpt-4o`
+ - **Fallbacks**: `["openai/gpt-4o"]` (1% provider as fallback)
+
+ **Why this works**: Bifrost now supports provider-prefixed entries in `allowed_models`, so `"openai/gpt-4o"` matches requests for `"gpt-4o"`.
+
+
+
+ ```json
+ {
+ "provider_configs": [
+ {
+ "provider": "vertex",
+ "allowed_models": ["claude-3-5-sonnet", "gemini-1.5-pro"]
+ }
+ ]
+ }
+ ```
+
+ Request `claude-3-5-sonnet`:
+ - Model catalog lookup: `GetProvidersForModel("claude-3-5-sonnet")`
+ - Finds: `["anthropic", "vertex", "bedrock"]`
+ - Validation: `claude-3-5-sonnet` in allowed_models ✅
+ - Sends to Vertex as: `anthropic/claude-3-5-sonnet`
+
+
+
+ ```json
+ {
+ "provider_configs": [
+ {
+ "provider": "groq",
+ "allowed_models": ["gpt-3.5-turbo"]
+ }
+ ]
+ }
+ ```
+
+ Request `gpt-3.5-turbo`:
+ - Special handling: Checks Groq catalog for `openai/gpt-3.5-turbo`
+ - ✅ Found, validation passes
+ - Sends to Groq as: `openai/gpt-3.5-turbo`
+
+
+
+
+
+
### How It's Used in Routing
-When a Virtual Key has empty `allowed_models`:
+When a Virtual Key has `provider_configs`, governance uses the model catalog for validation:
+**Empty allowed_models Example**:
```json
{
"provider_configs": [
{
"provider": "openai",
- "allowed_models": [], // Empty = use Model Catalog
+ "allowed_models": [], // Use catalog
"weight": 0.5
}
]
}
```
-Bifrost checks the Model Catalog:
-- Request for `gpt-4o` → ✅ Allowed (catalog shows OpenAI supports this)
-- Request for `claude-3-sonnet` → ❌ Rejected (catalog shows OpenAI doesn't support this)
+**Request Flow**:
+```bash
+curl -H "x-bf-vk: vk-123" -d '{"model": "gpt-4o"}'
+
+# 1. Governance checks: Is "gpt-4o" in GetModelsForProvider("openai")?
+# 2. Catalog lookup: modelPool["openai"] contains "gpt-4o" ✅
+# 3. Validation passes, provider selected
+# 4. Model becomes: "openai/gpt-4o"
+```
+
+**Rejection Example**:
+```bash
+curl -H "x-bf-vk: vk-123" -d '{"model": "claude-3-5-sonnet"}'
+
+# 1. Governance checks: Is "claude-3-5-sonnet" in GetModelsForProvider("openai")?
+# 2. Catalog lookup: modelPool["openai"] does NOT contain "claude-3-5-sonnet" ❌
+# 3. Validation fails, request rejected
+# 4. Error: "model not allowed for any configured provider"
+```
-When load balancing selects providers:
+When load balancing selects providers, it queries the catalog to find candidates:
+**Request Flow**:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-d '{"model": "gpt-4o", "messages": [...]}'
+
+# 1. Load balancer: GetProvidersForModel("gpt-4o")
+# 2. Catalog returns: ["openai", "azure", "groq"]
+# 3. Filter by configured providers: ["openai", "azure"] (groq not configured)
+# 4. Performance scoring: openai=0.95, azure=0.87
+# 5. Select: openai (highest score)
+# 6. Model becomes: "openai/gpt-4o"
+# 7. Fallbacks: ["azure/gpt-4o"]
+```
+
+**Cross-Provider Discovery**:
+```bash
+curl -d '{"model": "claude-3-5-sonnet"}'
+
+# 1. Load balancer: GetProvidersForModel("claude-3-5-sonnet")
+# 2. Catalog checks:
+# - Direct: ["anthropic"] ✅
+# - OpenRouter: Has "anthropic/claude-3-5-sonnet" ✅
+# - Vertex: Has "anthropic/claude-3-5-sonnet" ✅
+# - Bedrock: Has "anthropic.claude-3-5-sonnet-..." ✅
+# 3. Catalog returns: ["anthropic", "openrouter", "vertex", "bedrock"]
+# 4. Performance scoring across all four
+# 5. Best performer selected
```
-1. Load balancer calls `GetProvidersForModel("gpt-4o")`
-2. Model Catalog returns: [openai, azure, groq]
-3. Load balancer filters based on configured providers and allowed models
-4. Performance-based selection among filtered providers
+This is how Bifrost achieves **intelligent cross-provider routing** without manual configuration.
-**Model Catalog is essential for cross-provider routing**. Without it, Bifrost wouldn't know that `gpt-4o` is available from both OpenAI and Azure, limiting routing flexibility.
+**Model Catalog is essential for cross-provider routing**. Without it, Bifrost wouldn't know that `gpt-4o` is available from OpenAI, Azure, and Groq, or that `claude-3-5-sonnet` can be routed through Anthropic, Vertex, Bedrock, and OpenRouter. This knowledge powers both governance validation and load balancing provider discovery.
---
diff --git a/framework/changelog.md b/framework/changelog.md
index e69de29bb2..3ec60aa3ef 100644
--- a/framework/changelog.md
+++ b/framework/changelog.md
@@ -0,0 +1 @@
+- feat: Improved model matching to support provider-prefixed model names (e.g., "openai/gpt-4")
diff --git a/framework/modelcatalog/main.go b/framework/modelcatalog/main.go
index 64bba62ffd..573a3c805a 100644
--- a/framework/modelcatalog/main.go
+++ b/framework/modelcatalog/main.go
@@ -307,6 +307,81 @@ func (mc *ModelCatalog) GetProvidersForModel(model string) []schemas.ModelProvid
return providers
}
+// IsModelAllowedForProvider checks if a model is allowed for a specific provider
+// based on the allowed models list and catalog data. It handles all cross-provider
+// logic including provider-prefixed models and special routing rules.
+//
+// Parameters:
+// - provider: The provider to check against
+// - model: The model name (without provider prefix, e.g., "gpt-4o" or "claude-3-5-sonnet")
+// - allowedModels: List of allowed model names (can be empty, can include provider prefixes)
+//
+// Behavior:
+// - If allowedModels is empty: Uses model catalog to check if provider supports the model
+// (delegates to GetProvidersForModel which handles all cross-provider logic)
+// - If allowedModels is not empty: Checks if model matches any entry in the list
+// Provider-specific validation:
+// - Direct matches: "gpt-4o" in allowedModels for any provider
+// - Prefixed matches: Only if the prefixed model exists in provider's catalog
+// (e.g., "openai/gpt-4o" in allowedModels only matches if openrouter's catalog
+// contains "openai/gpt-4o" AND the model part matches the request)
+//
+// Returns:
+// - bool: true if the model is allowed for the provider, false otherwise
+//
+// Examples:
+//
+// // Empty allowedModels - uses catalog
+// mc.IsModelAllowedForProvider("openrouter", "claude-3-5-sonnet", []string{})
+// // Returns: true (catalog knows openrouter has "anthropic/claude-3-5-sonnet")
+//
+// // Explicit allowedModels with prefix - validates against catalog
+// mc.IsModelAllowedForProvider("openrouter", "gpt-4o", []string{"openai/gpt-4o"})
+// // Returns: true (openrouter's catalog contains "openai/gpt-4o" AND model part is "gpt-4o")
+//
+// // Explicit allowedModels with prefix - wrong model
+// mc.IsModelAllowedForProvider("openrouter", "claude-3-5-sonnet", []string{"openai/gpt-4o"})
+// // Returns: false (model part "gpt-4o" doesn't match request "claude-3-5-sonnet")
+//
+// // Explicit allowedModels without prefix
+// mc.IsModelAllowedForProvider("openai", "gpt-4o", []string{"gpt-4o"})
+// // Returns: true (direct match)
+func (mc *ModelCatalog) IsModelAllowedForProvider(provider schemas.ModelProvider, model string, allowedModels []string) bool {
+ // Case 1: Empty allowedModels = use catalog to determine support
+ // This leverages GetProvidersForModel which already handles all cross-provider logic
+ if len(allowedModels) == 0 {
+ supportedProviders := mc.GetProvidersForModel(model)
+ return slices.Contains(supportedProviders, provider)
+ }
+
+ // Case 2: Explicit allowedModels = check if model matches any entry
+ // Get provider's catalog models for validation of prefixed entries
+ providerCatalogModels := mc.GetModelsForProvider(provider)
+
+ for _, allowedModel := range allowedModels {
+ // Direct match: "gpt-4o" == "gpt-4o"
+ if allowedModel == model {
+ return true
+ }
+
+ // Provider-prefixed match: verify it exists in provider's catalog first
+ // This ensures we only allow provider-specific model combinations that are actually supported
+ if strings.Contains(allowedModel, "/") {
+ // Check if this exact prefixed model exists in the provider's catalog
+ // e.g., for openrouter, check if "openai/gpt-4o" is in its catalog
+ if slices.Contains(providerCatalogModels, allowedModel) {
+ // Extract the model part and compare with request
+ _, modelPart := schemas.ParseModelString(allowedModel, "")
+ if modelPart == model {
+ return true
+ }
+ }
+ }
+ }
+
+ return false
+}
+
// AddModelDataToPool adds model data to the model pool.
func (mc *ModelCatalog) AddModelDataToPool(modelData *schemas.BifrostListModelsResponse) {
if modelData == nil {
diff --git a/plugins/governance/changelog.md b/plugins/governance/changelog.md
index e69de29bb2..460c23fa1c 100644
--- a/plugins/governance/changelog.md
+++ b/plugins/governance/changelog.md
@@ -0,0 +1 @@
+- feat: Fixed weighted provider routing to correctly match provider-prefixed models in allowed lists
diff --git a/plugins/governance/main.go b/plugins/governance/main.go
index c2588d8ee8..6b4bbad00d 100644
--- a/plugins/governance/main.go
+++ b/plugins/governance/main.go
@@ -136,7 +136,7 @@ func Init(
}
// Initialize components in dependency order with fixed, optimal settings
// Resolver (pure decision engine for hierarchical governance, depends only on store)
- resolver := NewBudgetResolver(governanceStore, logger)
+ resolver := NewBudgetResolver(governanceStore, modelCatalog, logger)
// 3. Tracker (business logic owner, depends on store and resolver)
tracker := NewUsageTracker(ctx, governanceStore, resolver, configStore, logger)
@@ -199,7 +199,7 @@ func InitFromStore(
if config != nil {
isVkMandatory = config.IsVkMandatory
}
- resolver := NewBudgetResolver(governanceStore, logger)
+ resolver := NewBudgetResolver(governanceStore, modelCatalog, logger)
tracker := NewUsageTracker(ctx, governanceStore, resolver, configStore, logger)
// Perform startup reset check for any expired limits from downtime
if configStore != nil {
@@ -343,16 +343,22 @@ func (p *GovernancePlugin) loadBalanceProvider(body map[string]any, virtualKey *
}
allowedProviderConfigs := make([]configstoreTables.TableVirtualKeyProviderConfig, 0)
for _, config := range providerConfigs {
- var allAllowedModelsForProvider []string
- if p.modelCatalog != nil {
- allAllowedModelsForProvider = p.modelCatalog.GetModelsForProvider(schemas.ModelProvider(config.Provider))
- }
+ // Delegate model allowance check to model catalog
+ // This handles all cross-provider logic (OpenRouter, Vertex, Groq, Bedrock)
+ // and provider-prefixed allowed_models entries
isProviderAllowed := false
- if len(config.AllowedModels) == 0 {
- isProviderAllowed = len(allAllowedModelsForProvider) == 0 || slices.Contains(allAllowedModelsForProvider, modelStr)
+ if p.modelCatalog != nil {
+ isProviderAllowed = p.modelCatalog.IsModelAllowedForProvider(schemas.ModelProvider(config.Provider), modelStr, config.AllowedModels)
} else {
- isProviderAllowed = slices.Contains(config.AllowedModels, modelStr)
+ // Fallback when model catalog is not available: simple string matching
+ if len(config.AllowedModels) == 0 {
+ // No restrictions, allow all models
+ isProviderAllowed = true
+ } else {
+ isProviderAllowed = slices.Contains(config.AllowedModels, modelStr)
+ }
}
+
if isProviderAllowed {
// Check if the provider's budget or rate limits are violated using resolver helper methods
if p.resolver.isProviderBudgetViolated(config) || p.resolver.isProviderRateLimitViolated(config) {
diff --git a/plugins/governance/resolver.go b/plugins/governance/resolver.go
index e37f92a971..8f89f91def 100644
--- a/plugins/governance/resolver.go
+++ b/plugins/governance/resolver.go
@@ -9,6 +9,7 @@ import (
"github.com/maximhq/bifrost/core/schemas"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
)
// Decision represents the result of governance evaluation
@@ -62,15 +63,17 @@ type UsageInfo struct {
// BudgetResolver provides decision logic for the new hierarchical governance system
type BudgetResolver struct {
- store GovernanceStore
- logger schemas.Logger
+ store GovernanceStore
+ logger schemas.Logger
+ modelCatalog *modelcatalog.ModelCatalog
}
// NewBudgetResolver creates a new budget-based governance resolver
-func NewBudgetResolver(store GovernanceStore, logger schemas.Logger) *BudgetResolver {
+func NewBudgetResolver(store GovernanceStore, modelCatalog *modelcatalog.ModelCatalog, logger schemas.Logger) *BudgetResolver {
return &BudgetResolver{
- store: store,
- logger: logger,
+ store: store,
+ logger: logger,
+ modelCatalog: modelCatalog,
}
}
@@ -158,13 +161,20 @@ func (r *BudgetResolver) EvaluateRequest(ctx *schemas.BifrostContext, evaluation
// isModelAllowed checks if the requested model is allowed for this VK
func (r *BudgetResolver) isModelAllowed(vk *configstoreTables.TableVirtualKey, provider schemas.ModelProvider, model string) bool {
- // Empty AllowedModels means all models are allowed
+ // Empty ProviderConfigs means all models are allowed
if len(vk.ProviderConfigs) == 0 {
return true
}
for _, pc := range vk.ProviderConfigs {
if pc.Provider == string(provider) {
+ // Delegate model allowance check to model catalog
+ // This handles all cross-provider logic (OpenRouter, Vertex, Groq, Bedrock)
+ // and provider-prefixed allowed_models entries
+ if r.modelCatalog != nil {
+ return r.modelCatalog.IsModelAllowedForProvider(provider, model, pc.AllowedModels)
+ }
+ // Fallback when model catalog is not available: simple string matching
if len(pc.AllowedModels) == 0 {
return true
}
diff --git a/plugins/governance/resolver_test.go b/plugins/governance/resolver_test.go
index 1fb6e78b98..abe48ac1e6 100644
--- a/plugins/governance/resolver_test.go
+++ b/plugins/governance/resolver_test.go
@@ -23,7 +23,7 @@ func TestBudgetResolver_EvaluateRequest_AllowedRequest(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -43,7 +43,7 @@ func TestBudgetResolver_EvaluateRequest_VirtualKeyNotFound(t *testing.T) {
store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -65,7 +65,7 @@ func TestBudgetResolver_EvaluateRequest_VirtualKeyBlocked(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -92,7 +92,7 @@ func TestBudgetResolver_EvaluateRequest_ProviderBlocked(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
// Try to use OpenAI (not allowed)
@@ -128,7 +128,7 @@ func TestBudgetResolver_EvaluateRequest_ModelBlocked(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
// Try to use gpt-4o-mini (not in allowed list)
@@ -155,7 +155,7 @@ func TestBudgetResolver_EvaluateRequest_RateLimitExceeded_TokenLimit(t *testing.
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -182,7 +182,7 @@ func TestBudgetResolver_EvaluateRequest_RateLimitExceeded_RequestLimit(t *testin
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -224,7 +224,7 @@ func TestBudgetResolver_EvaluateRequest_RateLimitExpired(t *testing.T) {
err = store.ResetExpiredRateLimits(context.Background(), expiredRateLimits)
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -250,7 +250,7 @@ func TestBudgetResolver_EvaluateRequest_BudgetExceeded(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -281,7 +281,7 @@ func TestBudgetResolver_EvaluateRequest_BudgetExpired(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -319,7 +319,7 @@ func TestBudgetResolver_EvaluateRequest_MultiLevelBudgetHierarchy(t *testing.T)
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
// Test: All under limit should pass
@@ -361,7 +361,7 @@ func TestBudgetResolver_EvaluateRequest_ProviderLevelRateLimit(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -388,7 +388,7 @@ func TestBudgetResolver_CheckRateLimits_BothExceeded(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
@@ -407,7 +407,7 @@ func TestBudgetResolver_IsProviderAllowed(t *testing.T) {
store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
tests := []struct {
name string
@@ -455,7 +455,7 @@ func TestBudgetResolver_IsModelAllowed(t *testing.T) {
store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
tests := []struct {
name string
@@ -530,7 +530,7 @@ func TestBudgetResolver_ContextPopulation(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
ctx := &schemas.BifrostContext{}
result := resolver.EvaluateRequest(ctx, &EvaluationRequest{
diff --git a/plugins/governance/tracker_test.go b/plugins/governance/tracker_test.go
index 76f7e37a9c..835461292c 100644
--- a/plugins/governance/tracker_test.go
+++ b/plugins/governance/tracker_test.go
@@ -25,7 +25,7 @@ func TestUsageTracker_UpdateUsage_FailedRequest(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger)
defer tracker.Cleanup()
@@ -60,7 +60,7 @@ func TestUsageTracker_UpdateUsage_VirtualKeyNotFound(t *testing.T) {
store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger)
defer tracker.Cleanup()
@@ -94,7 +94,7 @@ func TestUsageTracker_UpdateUsage_StreamingOptimization(t *testing.T) {
})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger)
defer tracker.Cleanup()
@@ -157,7 +157,7 @@ func TestUsageTracker_Cleanup(t *testing.T) {
store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{})
require.NoError(t, err)
- resolver := NewBudgetResolver(store, logger)
+ resolver := NewBudgetResolver(store, nil, logger)
tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger)
// Should cleanup without error
diff --git a/transports/changelog.md b/transports/changelog.md
index e69de29bb2..e50e8ea93c 100644
--- a/transports/changelog.md
+++ b/transports/changelog.md
@@ -0,0 +1 @@
+- feat: Improved model validation for provider-prefixed model configurations