diff --git a/cli/go.mod b/cli/go.mod index 2c1c930bc2..c6a77eaf26 100644 --- a/cli/go.mod +++ b/cli/go.mod @@ -46,7 +46,7 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect golang.org/x/arch v0.23.0 // indirect - golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect + golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 // indirect golang.org/x/sys v0.41.0 // indirect golang.org/x/text v0.33.0 // indirect ) diff --git a/cli/go.sum b/cli/go.sum index e6e613043a..9746bb475a 100644 --- a/cli/go.sum +++ b/cli/go.sum @@ -89,8 +89,7 @@ github.com/zalando/go-keyring v0.2.6 h1:r7Yc3+H+Ux0+M72zacZoItR3UDxeWfKTcabvkI8u github.com/zalando/go-keyring v0.2.6/go.mod h1:2TCrxYrbUNYfNS/Kgy/LSrkSQzZ5UPVH85RwfczwvcI= golang.org/x/arch v0.23.0 h1:lKF64A2jF6Zd8L0knGltUnegD62JMFBiCPBmQpToHhg= golang.org/x/arch v0.23.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= -golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM= -golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8= +golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 h1:zfMcR1Cs4KNuomFFgGefv5N0czO2XZpUbxGUy8i8ug0= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= diff --git a/core/bifrost.go b/core/bifrost.go index f805ed03df..32959ed30a 100644 --- a/core/bifrost.go +++ b/core/bifrost.go @@ -4815,7 +4815,7 @@ func executeRequestWithRetries[T any]( } else { // Populate LLM response attributes for non-streaming responses if resp, ok := any(result).(*schemas.BifrostResponse); ok { - tracer.PopulateLLMResponseAttributes(handle, resp, bifrostError) + tracer.PopulateLLMResponseAttributes(ctx, handle, resp, bifrostError) } // End span with appropriate status diff --git a/core/providers/utils/utils.go b/core/providers/utils/utils.go index e48693e97d..3d722bc71b 100644 --- a/core/providers/utils/utils.go +++ b/core/providers/utils/utils.go @@ -2611,10 +2611,10 @@ func completeDeferredSpan(ctx *schemas.BifrostContext, result *schemas.BifrostRe if accumulatedResp != nil { // Use accumulated response for attributes (includes full content, tool calls, etc.) - tracer.PopulateLLMResponseAttributes(handle, accumulatedResp, err) + tracer.PopulateLLMResponseAttributes(ctx, handle, accumulatedResp, err) } else if result != nil { // Fall back to final chunk if no accumulated data (shouldn't happen normally) - tracer.PopulateLLMResponseAttributes(handle, result, err) + tracer.PopulateLLMResponseAttributes(ctx, handle, result, err) } // Finalize aggregated post-hook spans before ending the LLM span diff --git a/core/schemas/provider.go b/core/schemas/provider.go index 10d3a9d1ac..41e9cfebf0 100644 --- a/core/schemas/provider.go +++ b/core/schemas/provider.go @@ -8,15 +8,15 @@ import ( ) const ( - DefaultMaxRetries = 0 - DefaultRetryBackoffInitial = 500 * time.Millisecond - DefaultRetryBackoffMax = 5 * time.Second + DefaultMaxRetries = 0 + DefaultRetryBackoffInitial = 500 * time.Millisecond + DefaultRetryBackoffMax = 5 * time.Second DefaultRequestTimeoutInSeconds = 30 - DefaultMaxConnDurationInSeconds = 300 // 5 minutes — forces connection recycling to prevent stale connections from NAT/LB silent drops - DefaultBufferSize = 5000 - DefaultConcurrency = 1000 - DefaultStreamBufferSize = 256 - DefaultStreamIdleTimeoutInSeconds = 60 // Idle timeout per stream chunk — if no data for this many seconds, bifrost closes the connection + DefaultMaxConnDurationInSeconds = 300 // 5 minutes — forces connection recycling to prevent stale connections from NAT/LB silent drops + DefaultBufferSize = 5000 + DefaultConcurrency = 1000 + DefaultStreamBufferSize = 256 + DefaultStreamIdleTimeoutInSeconds = 60 // Idle timeout per stream chunk — if no data for this many seconds, bifrost closes the connection ) // Pre-defined errors for provider operations @@ -49,14 +49,14 @@ const ( // - When marshaling to JSON: a time.Duration is converted to milliseconds type NetworkConfig struct { // BaseURL is supported for OpenAI, Anthropic, Cohere, Mistral, and Ollama providers (required for Ollama) - BaseURL string `json:"base_url,omitempty"` // Base URL for the provider (optional) - ExtraHeaders map[string]string `json:"extra_headers,omitempty"` // Additional headers to include in requests (optional) - DefaultRequestTimeoutInSeconds int `json:"default_request_timeout_in_seconds"` // Default timeout for requests - MaxRetries int `json:"max_retries"` // Maximum number of retries - RetryBackoffInitial time.Duration `json:"retry_backoff_initial"` // Initial backoff duration (stored as nanoseconds, JSON as milliseconds) - RetryBackoffMax time.Duration `json:"retry_backoff_max"` // Maximum backoff duration (stored as nanoseconds, JSON as milliseconds) - InsecureSkipVerify bool `json:"insecure_skip_verify,omitempty"` // Disables TLS certificate verification for provider connections - CACertPEM string `json:"ca_cert_pem,omitempty"` // PEM-encoded CA certificate to trust for provider endpoint connections + BaseURL string `json:"base_url,omitempty"` // Base URL for the provider (optional) + ExtraHeaders map[string]string `json:"extra_headers,omitempty"` // Additional headers to include in requests (optional) + DefaultRequestTimeoutInSeconds int `json:"default_request_timeout_in_seconds"` // Default timeout for requests + MaxRetries int `json:"max_retries"` // Maximum number of retries + RetryBackoffInitial time.Duration `json:"retry_backoff_initial"` // Initial backoff duration (stored as nanoseconds, JSON as milliseconds) + RetryBackoffMax time.Duration `json:"retry_backoff_max"` // Maximum backoff duration (stored as nanoseconds, JSON as milliseconds) + InsecureSkipVerify bool `json:"insecure_skip_verify,omitempty"` // Disables TLS certificate verification for provider connections + CACertPEM string `json:"ca_cert_pem,omitempty"` // PEM-encoded CA certificate to trust for provider endpoint connections StreamIdleTimeoutInSeconds int `json:"stream_idle_timeout_in_seconds,omitempty"` // Idle timeout per stream chunk (0 = use default 60s) } @@ -387,67 +387,6 @@ type CustomProviderConfig struct { RequestPathOverrides map[RequestType]string `json:"request_path_overrides,omitempty"` // Mapping of request type to its custom path which will override the default path of the provider (not allowed for Bedrock) } -type PricingOverrideMatchType string - -const ( - PricingOverrideMatchExact PricingOverrideMatchType = "exact" - PricingOverrideMatchWildcard PricingOverrideMatchType = "wildcard" - PricingOverrideMatchRegex PricingOverrideMatchType = "regex" -) - -// ProviderPricingOverride contains a partial pricing patch applied at lookup time. -// Any nil field falls back to the base pricing data. -type ProviderPricingOverride struct { - ModelPattern string `json:"model_pattern"` - MatchType PricingOverrideMatchType `json:"match_type"` - RequestTypes []RequestType `json:"request_types,omitempty"` - - // Basic token pricing - InputCostPerToken *float64 `json:"input_cost_per_token,omitempty"` - OutputCostPerToken *float64 `json:"output_cost_per_token,omitempty"` - - // Additional pricing for media - InputCostPerVideoPerSecond *float64 `json:"input_cost_per_video_per_second,omitempty"` - InputCostPerAudioPerSecond *float64 `json:"input_cost_per_audio_per_second,omitempty"` - - // Character-based pricing - InputCostPerCharacter *float64 `json:"input_cost_per_character,omitempty"` - - // Pricing above 128k tokens - InputCostPerTokenAbove128kTokens *float64 `json:"input_cost_per_token_above_128k_tokens,omitempty"` - InputCostPerImageAbove128kTokens *float64 `json:"input_cost_per_image_above_128k_tokens,omitempty"` - InputCostPerVideoPerSecondAbove128kTokens *float64 `json:"input_cost_per_video_per_second_above_128k_tokens,omitempty"` - InputCostPerAudioPerSecondAbove128kTokens *float64 `json:"input_cost_per_audio_per_second_above_128k_tokens,omitempty"` - OutputCostPerTokenAbove128kTokens *float64 `json:"output_cost_per_token_above_128k_tokens,omitempty"` - - // Pricing above 200k tokens - InputCostPerTokenAbove200kTokens *float64 `json:"input_cost_per_token_above_200k_tokens,omitempty"` - OutputCostPerTokenAbove200kTokens *float64 `json:"output_cost_per_token_above_200k_tokens,omitempty"` - CacheCreationInputTokenCostAbove200kTokens *float64 `json:"cache_creation_input_token_cost_above_200k_tokens,omitempty"` - CacheReadInputTokenCostAbove200kTokens *float64 `json:"cache_read_input_token_cost_above_200k_tokens,omitempty"` - - // Cache and batch pricing - CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost,omitempty"` - CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost,omitempty"` - InputCostPerTokenBatches *float64 `json:"input_cost_per_token_batches,omitempty"` - OutputCostPerTokenBatches *float64 `json:"output_cost_per_token_batches,omitempty"` - - // Image generation pricing - InputCostPerImageToken *float64 `json:"input_cost_per_image_token,omitempty"` - OutputCostPerImageToken *float64 `json:"output_cost_per_image_token,omitempty"` - InputCostPerImage *float64 `json:"input_cost_per_image,omitempty"` - OutputCostPerImage *float64 `json:"output_cost_per_image,omitempty"` - OutputCostPerImageAbove1024x1024Pixels *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels,omitempty"` - OutputCostPerImageAbove1024x1024PixelsPremium *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels_and_premium_image,omitempty"` - OutputCostPerImageAbove2048x2048Pixels *float64 `json:"output_cost_per_image_above_2048_and_2048_pixels,omitempty"` - OutputCostPerImageAbove4096x4096Pixels *float64 `json:"output_cost_per_image_above_4096_and_4096_pixels,omitempty"` - OutputCostPerImageLowQuality *float64 `json:"output_cost_per_image_low_quality,omitempty"` - OutputCostPerImageMediumQuality *float64 `json:"output_cost_per_image_medium_quality,omitempty"` - OutputCostPerImageHighQuality *float64 `json:"output_cost_per_image_high_quality,omitempty"` - OutputCostPerImageAutoQuality *float64 `json:"output_cost_per_image_auto_quality,omitempty"` - CacheReadInputImageTokenCost *float64 `json:"cache_read_input_image_token_cost,omitempty"` -} - // IsOperationAllowed checks if a specific operation is allowed for this custom provider func (cpc *CustomProviderConfig) IsOperationAllowed(operation RequestType) bool { if cpc == nil || cpc.AllowedRequests == nil { @@ -463,13 +402,12 @@ type ProviderConfig struct { NetworkConfig NetworkConfig `json:"network_config"` // Network configuration ConcurrencyAndBufferSize ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings // Logger instance, can be provided by the user or bifrost default logger is used if not provided - Logger Logger `json:"-"` - ProxyConfig *ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration - SendBackRawRequest bool `json:"send_back_raw_request"` // Send raw request back in the bifrost response (default: false) - SendBackRawResponse bool `json:"send_back_raw_response"` // Send raw response back in the bifrost response (default: false) - StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false) - CustomProviderConfig *CustomProviderConfig `json:"custom_provider_config,omitempty"` - PricingOverrides []ProviderPricingOverride `json:"pricing_overrides,omitempty"` + Logger Logger `json:"-"` + ProxyConfig *ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration + SendBackRawRequest bool `json:"send_back_raw_request"` // Send raw request back in the bifrost response (default: false) + SendBackRawResponse bool `json:"send_back_raw_response"` // Send raw response back in the bifrost response (default: false) + StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false) + CustomProviderConfig *CustomProviderConfig `json:"custom_provider_config,omitempty"` } func (config *ProviderConfig) CheckAndSetDefaults() { diff --git a/core/schemas/tracer.go b/core/schemas/tracer.go index 74f4442b47..820d88c9dc 100644 --- a/core/schemas/tracer.go +++ b/core/schemas/tracer.go @@ -68,7 +68,7 @@ type Tracer interface { // PopulateLLMResponseAttributes populates all LLM-specific response attributes on the span. // This includes output messages, tokens, usage stats, and error information if present. - PopulateLLMResponseAttributes(handle SpanHandle, resp *BifrostResponse, err *BifrostError) + PopulateLLMResponseAttributes(ctx *BifrostContext, handle SpanHandle, resp *BifrostResponse, err *BifrostError) // StoreDeferredSpan stores a span handle for later completion (used for streaming requests). // The span handle is stored keyed by trace ID so it can be retrieved when the stream completes. @@ -144,7 +144,7 @@ func (n *NoOpTracer) AddEvent(_ SpanHandle, _ string, _ map[string]any) {} func (n *NoOpTracer) PopulateLLMRequestAttributes(_ SpanHandle, _ *BifrostRequest) {} // PopulateLLMResponseAttributes does nothing. -func (n *NoOpTracer) PopulateLLMResponseAttributes(_ SpanHandle, _ *BifrostResponse, _ *BifrostError) { +func (n *NoOpTracer) PopulateLLMResponseAttributes(_ *BifrostContext, _ SpanHandle, _ *BifrostResponse, _ *BifrostError) { } // StoreDeferredSpan does nothing. diff --git a/docs/architecture/framework/model-catalog.mdx b/docs/architecture/framework/model-catalog.mdx index 53e05433c6..76daad3871 100644 --- a/docs/architecture/framework/model-catalog.mdx +++ b/docs/architecture/framework/model-catalog.mdx @@ -189,6 +189,7 @@ Calculate costs from a Bifrost response: // Calculate cost for a completed request cost := modelCatalog.CalculateCost( result, // *schemas.BifrostResponse + nil, // *PricingLookupScopes (nil = no scoped overrides) ) logger.Info("Request cost: $%.6f", cost) @@ -199,7 +200,7 @@ logger.Info("Request cost: $%.6f", cost) ```go // CalculateCost handles all cost scenarios including cache-aware pricing -cost := modelCatalog.CalculateCost(result) // *schemas.BifrostResponse +cost := modelCatalog.CalculateCost(result, nil) // *schemas.BifrostResponse, *PricingLookupScopes // Cache hits return 0 for direct hits, embedding cost for semantic matches // Cache misses return base model cost + embedding generation cost diff --git a/docs/docs.json b/docs/docs.json index 22e8925d2d..bdbcd02d8f 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -154,6 +154,7 @@ "providers/reasoning", "providers/performance", "providers/custom-providers", + "providers/custom-pricing", "providers/request-options" ] }, diff --git a/docs/media/ui-custom-pricing-form.png b/docs/media/ui-custom-pricing-form.png new file mode 100644 index 0000000000..4bdefd5731 Binary files /dev/null and b/docs/media/ui-custom-pricing-form.png differ diff --git a/docs/media/ui-custom-pricing-table.png b/docs/media/ui-custom-pricing-table.png new file mode 100644 index 0000000000..470eaa9902 Binary files /dev/null and b/docs/media/ui-custom-pricing-table.png differ diff --git a/docs/openapi/openapi.json b/docs/openapi/openapi.json index aa1671ca22..21eaeb3f5b 100644 --- a/docs/openapi/openapi.json +++ b/docs/openapi/openapi.json @@ -2,7 +2,7 @@ "openapi": "3.1.0", "info": { "title": "Bifrost API", - "description": "Bifrost HTTP Transport API for AI model inference and gateway management.\n\nThis API provides a unified interface for interacting with multiple AI providers\nincluding OpenAI, Anthropic, Bedrock, Gemini, and more through a single API,\nalong with comprehensive management APIs for configuring and monitoring the gateway.\n\n## API Structure\n\n### Unified Inference API (`/v1/*`)\nThe primary API using Bifrost's unified format. Model parameters use the format\n`provider/model` (e.g., `openai/gpt-4`, `anthropic/claude-3-opus`).\n\n### Async Inference API (`/v1/async/*`)\nSubmit inference requests for asynchronous execution. Returns a job ID immediately\nand allows polling for results. Supports all inference types except batches, files,\nand containers.\n\n### Provider Integration APIs\nNative provider-format APIs for drop-in compatibility:\n- `/openai/*` - OpenAI-compatible API\n- `/anthropic/*` - Anthropic-compatible API\n- `/genai/*` - Google GenAI (Gemini) compatible API\n- `/bedrock/*` - AWS Bedrock compatible API\n- `/cohere/*` - Cohere compatible API\n\n### Framework Integration APIs\nMulti-provider proxy endpoints for AI frameworks:\n- `/litellm/*` - LiteLLM proxy with all provider formats\n- `/langchain/*` - LangChain compatible endpoints\n- `/pydanticai/*` - PydanticAI compatible endpoints\n\n### Management APIs (`/api/*`)\nAPIs for managing and monitoring the Bifrost gateway:\n- `/api/config` - Configuration management\n- `/api/providers` - Provider and API key management\n- `/api/plugins` - Plugin management\n- `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, and routing rules\n- `/api/logs` - Log search and analytics\n- `/api/mcp/*` - MCP (Model Context Protocol) client management\n- `/api/session/*` - Authentication and session management\n- `/api/cache/*` - Cache management\n- `/health` - Health check endpoint\n\n## Fallbacks\nRequests can include fallback models that will be tried if the primary model fails.\n", + "description": "Bifrost HTTP Transport API for AI model inference and gateway management.\n\nThis API provides a unified interface for interacting with multiple AI providers\nincluding OpenAI, Anthropic, Bedrock, Gemini, and more through a single API,\nalong with comprehensive management APIs for configuring and monitoring the gateway.\n\n## API Structure\n\n### Unified Inference API (`/v1/*`)\nThe primary API using Bifrost's unified format. Model parameters use the format\n`provider/model` (e.g., `openai/gpt-4`, `anthropic/claude-3-opus`).\n\n### Async Inference API (`/v1/async/*`)\nSubmit inference requests for asynchronous execution. Returns a job ID immediately\nand allows polling for results. Supports all inference types except batches, files,\nand containers.\n\n### Provider Integration APIs\nNative provider-format APIs for drop-in compatibility:\n- `/openai/*` - OpenAI-compatible API\n- `/anthropic/*` - Anthropic-compatible API\n- `/genai/*` - Google GenAI (Gemini) compatible API\n- `/bedrock/*` - AWS Bedrock compatible API\n- `/cohere/*` - Cohere compatible API\n\n### Framework Integration APIs\nMulti-provider proxy endpoints for AI frameworks:\n- `/litellm/*` - LiteLLM proxy with all provider formats\n- `/langchain/*` - LangChain compatible endpoints\n- `/pydanticai/*` - PydanticAI compatible endpoints\n\n### Management APIs (`/api/*`)\nAPIs for managing and monitoring the Bifrost gateway:\n- `/api/config` - Configuration management\n- `/api/providers` - Provider and API key management\n- `/api/plugins` - Plugin management\n- `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, routing rules, and pricing overrides\n- `/api/logs` - Log search and analytics\n- `/api/mcp/*` - MCP (Model Context Protocol) client management\n- `/api/session/*` - Authentication and session management\n- `/api/cache/*` - Cache management\n- `/health` - Health check endpoint\n\n## Fallbacks\nRequests can include fallback models that will be tried if the primary model fails.\n", "version": "1.0.0", "contact": { "name": "Contact Us", @@ -145104,7 +145104,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -145649,6 +145651,7 @@ }, "provider_configs": { "type": "array", + "description": "Provider configurations (empty means no providers allowed, deny-by-default)", "items": { "type": "object", "properties": { @@ -145656,7 +145659,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -145711,6 +145716,7 @@ }, "mcp_configs": { "type": "array", + "description": "MCP configurations (empty means no MCP tools allowed, deny-by-default)", "items": { "type": "object", "properties": { @@ -145823,7 +145829,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -146494,7 +146502,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -147137,7 +147147,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -147299,7 +147311,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -148334,7 +148348,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -148845,7 +148861,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -149556,7 +149574,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -150067,7 +150087,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -150838,7 +150860,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -151349,7 +151373,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -152144,7 +152170,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -152655,7 +152683,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -153693,7 +153723,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -154372,7 +154404,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -155181,7 +155215,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -155692,7 +155728,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -156181,7 +156219,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -156944,7 +156984,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -162068,8 +162110,2080 @@ } } }, - "404": { - "description": "Provider not found", + "404": { + "description": "Provider not found", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Error response from Bifrost", + "properties": { + "event_id": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_bifrost_error": { + "type": "boolean" + }, + "status_code": { + "type": "integer" + }, + "error": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "event_id": { + "type": "string" + } + } + }, + "extra_fields": { + "type": "object", + "properties": { + "provider": { + "type": "string", + "description": "AI model provider identifier", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "vllm", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "replicate", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "runway" + ] + }, + "model_requested": { + "type": "string" + }, + "request_type": { + "type": "string" + } + } + } + } + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Error response from Bifrost", + "properties": { + "event_id": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_bifrost_error": { + "type": "boolean" + }, + "status_code": { + "type": "integer" + }, + "error": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "event_id": { + "type": "string" + } + } + }, + "extra_fields": { + "type": "object", + "properties": { + "provider": { + "type": "string", + "description": "AI model provider identifier", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "vllm", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "replicate", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "runway" + ] + }, + "model_requested": { + "type": "string" + }, + "request_type": { + "type": "string" + } + } + } + } + } + } + } + } + } + }, + "delete": { + "operationId": "deleteProviderGovernance", + "summary": "Delete provider governance", + "description": "Removes governance settings (budget and rate limits) for a specific provider.", + "tags": [ + "Governance" + ], + "parameters": [ + { + "name": "provider_name", + "in": "path", + "required": true, + "description": "Provider name", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Provider governance deleted successfully", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Simple message response", + "properties": { + "message": { + "type": "string" + } + } + } + } + } + }, + "404": { + "description": "Provider not found", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Error response from Bifrost", + "properties": { + "event_id": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_bifrost_error": { + "type": "boolean" + }, + "status_code": { + "type": "integer" + }, + "error": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "event_id": { + "type": "string" + } + } + }, + "extra_fields": { + "type": "object", + "properties": { + "provider": { + "type": "string", + "description": "AI model provider identifier", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "vllm", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "replicate", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "runway" + ] + }, + "model_requested": { + "type": "string" + }, + "request_type": { + "type": "string" + } + } + } + } + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Error response from Bifrost", + "properties": { + "event_id": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_bifrost_error": { + "type": "boolean" + }, + "status_code": { + "type": "integer" + }, + "error": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "event_id": { + "type": "string" + } + } + }, + "extra_fields": { + "type": "object", + "properties": { + "provider": { + "type": "string", + "description": "AI model provider identifier", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "vllm", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "replicate", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "runway" + ] + }, + "model_requested": { + "type": "string" + }, + "request_type": { + "type": "string" + } + } + } + } + } + } + } + } + } + } + }, + "/api/governance/pricing-overrides": { + "get": { + "operationId": "listPricingOverrides", + "summary": "List pricing overrides", + "description": "Returns all pricing overrides, optionally filtered by scope.", + "tags": [ + "Governance" + ], + "parameters": [ + { + "name": "scope_kind", + "in": "query", + "description": "Filter by scope kind", + "schema": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ] + } + }, + { + "name": "virtual_key_id", + "in": "query", + "description": "Filter by virtual key ID (for virtual_key* scopes)", + "schema": { + "type": "string" + } + }, + { + "name": "provider_id", + "in": "query", + "description": "Filter by provider ID", + "schema": { + "type": "string" + } + }, + { + "name": "provider_key_id", + "in": "query", + "description": "Filter by provider key ID", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "pricing_overrides": { + "type": "array", + "items": { + "type": "object", + "description": "A pricing override that applies custom rates to matching requests.", + "properties": { + "id": { + "type": "string", + "description": "Unique override ID (UUID)" + }, + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ], + "description": "Scope that determines which requests this override applies to" + }, + "virtual_key_id": { + "type": "string", + "nullable": true, + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "nullable": true, + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "nullable": true, + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ], + "description": "How the pattern is matched against the model name" + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (as stored in the database)" + }, + "patch": { + "type": "object", + "description": "Decoded pricing fields (present in API responses)", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + }, + "config_hash": { + "type": "string", + "nullable": true, + "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually." + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + } + } + } + }, + "count": { + "type": "integer", + "description": "Total number of overrides returned" + } + } + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Error response from Bifrost", + "properties": { + "event_id": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_bifrost_error": { + "type": "boolean" + }, + "status_code": { + "type": "integer" + }, + "error": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "event_id": { + "type": "string" + } + } + }, + "extra_fields": { + "type": "object", + "properties": { + "provider": { + "type": "string", + "description": "AI model provider identifier", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "vllm", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "replicate", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "runway" + ] + }, + "model_requested": { + "type": "string" + }, + "request_type": { + "type": "string" + } + } + } + } + } + } + } + } + } + }, + "post": { + "operationId": "createPricingOverride", + "summary": "Create pricing override", + "description": "Creates a new pricing override. The most specific matching scope always wins during cost resolution.", + "tags": [ + "Governance" + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Request body for creating or updating a pricing override.", + "required": [ + "name", + "scope_kind", + "match_type", + "pattern", + "request_types" + ], + "properties": { + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ] + }, + "virtual_key_id": { + "type": "string", + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ] + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix ending with * (e.g. \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "patch": { + "type": "object", + "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + } + } + } + } + } + }, + "responses": { + "201": { + "description": "Pricing override created successfully", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "pricing_override": { + "type": "object", + "description": "A pricing override that applies custom rates to matching requests.", + "properties": { + "id": { + "type": "string", + "description": "Unique override ID (UUID)" + }, + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ], + "description": "Scope that determines which requests this override applies to" + }, + "virtual_key_id": { + "type": "string", + "nullable": true, + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "nullable": true, + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "nullable": true, + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ], + "description": "How the pattern is matched against the model name" + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (as stored in the database)" + }, + "patch": { + "type": "object", + "description": "Decoded pricing fields (present in API responses)", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + }, + "config_hash": { + "type": "string", + "nullable": true, + "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually." + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + } + } + } + } + } + } + } + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Error response from Bifrost", + "properties": { + "event_id": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_bifrost_error": { + "type": "boolean" + }, + "status_code": { + "type": "integer" + }, + "error": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "event_id": { + "type": "string" + } + } + }, + "extra_fields": { + "type": "object", + "properties": { + "provider": { + "type": "string", + "description": "AI model provider identifier", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "vllm", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "replicate", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "runway" + ] + }, + "model_requested": { + "type": "string" + }, + "request_type": { + "type": "string" + } + } + } + } + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Error response from Bifrost", + "properties": { + "event_id": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_bifrost_error": { + "type": "boolean" + }, + "status_code": { + "type": "integer" + }, + "error": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "event_id": { + "type": "string" + } + } + }, + "extra_fields": { + "type": "object", + "properties": { + "provider": { + "type": "string", + "description": "AI model provider identifier", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "vllm", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "replicate", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "runway" + ] + }, + "model_requested": { + "type": "string" + }, + "request_type": { + "type": "string" + } + } + } + } + } + } + } + } + } + } + }, + "/api/governance/pricing-overrides/{id}": { + "put": { + "operationId": "updatePricingOverride", + "summary": "Update pricing override", + "description": "Replaces an existing pricing override's configuration.", + "tags": [ + "Governance" + ], + "parameters": [ + { + "name": "id", + "in": "path", + "required": true, + "description": "Pricing override ID", + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Request body for creating or updating a pricing override.", + "required": [ + "name", + "scope_kind", + "match_type", + "pattern", + "request_types" + ], + "properties": { + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ] + }, + "virtual_key_id": { + "type": "string", + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ] + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix ending with * (e.g. \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "patch": { + "type": "object", + "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + } + } + } + } + } + }, + "responses": { + "200": { + "description": "Pricing override updated successfully", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "pricing_override": { + "type": "object", + "description": "A pricing override that applies custom rates to matching requests.", + "properties": { + "id": { + "type": "string", + "description": "Unique override ID (UUID)" + }, + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ], + "description": "Scope that determines which requests this override applies to" + }, + "virtual_key_id": { + "type": "string", + "nullable": true, + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "nullable": true, + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "nullable": true, + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ], + "description": "How the pattern is matched against the model name" + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (as stored in the database)" + }, + "patch": { + "type": "object", + "description": "Decoded pricing fields (present in API responses)", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + }, + "config_hash": { + "type": "string", + "nullable": true, + "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually." + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + } + } + } + } + } + } + } + }, + "400": { + "description": "Bad request", "content": { "application/json": { "schema": { @@ -162152,8 +164266,8 @@ } } }, - "500": { - "description": "Internal server error", + "404": { + "description": "Pricing override not found", "content": { "application/json": { "schema": { @@ -162235,46 +164349,9 @@ } } } - } - } - }, - "delete": { - "operationId": "deleteProviderGovernance", - "summary": "Delete provider governance", - "description": "Removes governance settings (budget and rate limits) for a specific provider.", - "tags": [ - "Governance" - ], - "parameters": [ - { - "name": "provider_name", - "in": "path", - "required": true, - "description": "Provider name", - "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "Provider governance deleted successfully", - "content": { - "application/json": { - "schema": { - "type": "object", - "description": "Simple message response", - "properties": { - "message": { - "type": "string" - } - } - } - } - } }, - "404": { - "description": "Provider not found", + "500": { + "description": "Internal server error", "content": { "application/json": { "schema": { @@ -162356,6 +164433,43 @@ } } } + } + } + }, + "delete": { + "operationId": "deletePricingOverride", + "summary": "Delete pricing override", + "description": "Deletes a pricing override by ID.", + "tags": [ + "Governance" + ], + "parameters": [ + { + "name": "id", + "in": "path", + "required": true, + "description": "Pricing override ID", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Pricing override deleted successfully", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Simple message response", + "properties": { + "message": { + "type": "string" + } + } + } + } + } }, "500": { "description": "Internal server error", @@ -170315,7 +172429,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -198971,7 +201087,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -199437,7 +201555,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -199877,7 +201997,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -200336,7 +202458,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -200781,6 +202905,7 @@ }, "provider_configs": { "type": "array", + "description": "Provider configurations (empty means no providers allowed, deny-by-default)", "items": { "type": "object", "properties": { @@ -200788,7 +202913,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -200843,6 +202970,7 @@ }, "mcp_configs": { "type": "array", + "description": "MCP configurations (empty means no MCP tools allowed, deny-by-default)", "items": { "type": "object", "properties": { @@ -200927,7 +203055,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -201147,7 +203277,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -201658,7 +203790,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -202224,7 +204358,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -202735,7 +204871,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -203302,7 +205440,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -203813,7 +205953,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -204484,7 +206626,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -204995,7 +207139,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -205484,7 +207630,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -206021,7 +208169,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -206559,7 +208709,9 @@ "type": "string" }, "weight": { - "type": "number" + "type": "number", + "nullable": true, + "description": "Weight for provider load balancing. Null means excluded from weighted routing." }, "allowed_models": { "type": "array", @@ -209096,6 +211248,1267 @@ } } }, + "PricingOverrideRequestType": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "PricingPatch": { + "type": "object", + "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + }, + "PricingOverride": { + "type": "object", + "description": "A pricing override that applies custom rates to matching requests.", + "properties": { + "id": { + "type": "string", + "description": "Unique override ID (UUID)" + }, + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ], + "description": "Scope that determines which requests this override applies to" + }, + "virtual_key_id": { + "type": "string", + "nullable": true, + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "nullable": true, + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "nullable": true, + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ], + "description": "How the pattern is matched against the model name" + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (as stored in the database)" + }, + "patch": { + "type": "object", + "description": "Decoded pricing fields (present in API responses)", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + }, + "config_hash": { + "type": "string", + "nullable": true, + "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually." + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + } + } + }, + "CreatePricingOverrideRequest": { + "type": "object", + "description": "Request body for creating or updating a pricing override.", + "required": [ + "name", + "scope_kind", + "match_type", + "pattern", + "request_types" + ], + "properties": { + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ] + }, + "virtual_key_id": { + "type": "string", + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ] + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix ending with * (e.g. \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "patch": { + "type": "object", + "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + } + } + }, + "PricingOverrideResponse": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "pricing_override": { + "type": "object", + "description": "A pricing override that applies custom rates to matching requests.", + "properties": { + "id": { + "type": "string", + "description": "Unique override ID (UUID)" + }, + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ], + "description": "Scope that determines which requests this override applies to" + }, + "virtual_key_id": { + "type": "string", + "nullable": true, + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "nullable": true, + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "nullable": true, + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ], + "description": "How the pattern is matched against the model name" + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (as stored in the database)" + }, + "patch": { + "type": "object", + "description": "Decoded pricing fields (present in API responses)", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + }, + "config_hash": { + "type": "string", + "nullable": true, + "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually." + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + } + } + } + } + }, + "ListPricingOverridesResponse": { + "type": "object", + "properties": { + "pricing_overrides": { + "type": "array", + "items": { + "type": "object", + "description": "A pricing override that applies custom rates to matching requests.", + "properties": { + "id": { + "type": "string", + "description": "Unique override ID (UUID)" + }, + "name": { + "type": "string", + "description": "Human-readable label" + }, + "scope_kind": { + "type": "string", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ], + "description": "Scope that determines which requests this override applies to" + }, + "virtual_key_id": { + "type": "string", + "nullable": true, + "description": "Required for virtual_key* scopes" + }, + "provider_id": { + "type": "string", + "nullable": true, + "description": "Required for provider and virtual_key_provider scopes" + }, + "provider_key_id": { + "type": "string", + "nullable": true, + "description": "Required for provider_key and virtual_key_provider_key scopes" + }, + "match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ], + "description": "How the pattern is matched against the model name" + }, + "pattern": { + "type": "string", + "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")" + }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "description": "Request types this override applies to. At least one value is required." + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (as stored in the database)" + }, + "patch": { + "type": "object", + "description": "Decoded pricing fields (present in API responses)", + "properties": { + "input_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_batches": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_priority": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_character": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_128k_tokens": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_token_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_above_200k_tokens": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_token_cost_priority": { + "type": "number", + "minimum": 0 + }, + "cache_read_input_image_token_cost": { + "type": "number", + "minimum": 0 + }, + "cache_creation_input_audio_token_cost": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_pixel": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_low_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_medium_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_high_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_auto_quality": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_premium_image": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_512_and_512_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_1024_and_1024_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_2048_and_2048_pixels": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_image_above_4096_and_4096_pixels": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_audio_token": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_audio_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "input_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_video_per_second": { + "type": "number", + "minimum": 0 + }, + "output_cost_per_second": { + "type": "number", + "minimum": 0 + }, + "search_context_cost_per_query": { + "type": "number", + "minimum": 0 + }, + "code_interpreter_cost_per_session": { + "type": "number", + "minimum": 0 + } + } + }, + "config_hash": { + "type": "string", + "nullable": true, + "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually." + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + } + } + } + }, + "count": { + "type": "integer", + "description": "Total number of overrides returned" + } + } + }, "LogEntry": { "type": "object", "description": "Log entry", diff --git a/docs/openapi/openapi.yaml b/docs/openapi/openapi.yaml index 9059709cb5..027fa9a643 100644 --- a/docs/openapi/openapi.yaml +++ b/docs/openapi/openapi.yaml @@ -38,7 +38,7 @@ info: - `/api/config` - Configuration management - `/api/providers` - Provider and API key management - `/api/plugins` - Plugin management - - `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, and routing rules + - `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, routing rules, and pricing overrides - `/api/logs` - Log search and analytics - `/api/mcp/*` - MCP (Model Context Protocol) client management - `/api/session/*` - Authentication and session management @@ -636,6 +636,12 @@ paths: /api/governance/providers/{provider_name}: $ref: './paths/management/governance.yaml#/provider-governance-by-name' + # Governance - Pricing Overrides + /api/governance/pricing-overrides: + $ref: './paths/management/governance.yaml#/pricing-overrides' + /api/governance/pricing-overrides/{id}: + $ref: './paths/management/governance.yaml#/pricing-overrides-by-id' + # Logging /api/logs: $ref: './paths/management/logging.yaml#/logs' @@ -1097,6 +1103,22 @@ components: UpdateProviderGovernanceRequest: $ref: './schemas/management/governance.yaml#/UpdateProviderGovernanceRequest' + # Governance - Pricing Overrides + PricingOverrideRequestType: + $ref: './schemas/management/governance.yaml#/PricingOverrideRequestType' + PricingPatch: + $ref: './schemas/management/governance.yaml#/PricingPatch' + PricingOverride: + $ref: './schemas/management/governance.yaml#/PricingOverride' + CreatePricingOverrideRequest: + $ref: './schemas/management/governance.yaml#/CreatePricingOverrideRequest' + UpdatePricingOverrideRequest: + $ref: './schemas/management/governance.yaml#/UpdatePricingOverrideRequest' + PricingOverrideResponse: + $ref: './schemas/management/governance.yaml#/PricingOverrideResponse' + ListPricingOverridesResponse: + $ref: './schemas/management/governance.yaml#/ListPricingOverridesResponse' + # Logging LogEntry: $ref: './schemas/management/logging.yaml#/LogEntry' diff --git a/docs/openapi/paths/management/governance.yaml b/docs/openapi/paths/management/governance.yaml index b9e85bfdd6..35e38e1b99 100644 --- a/docs/openapi/paths/management/governance.yaml +++ b/docs/openapi/paths/management/governance.yaml @@ -897,4 +897,135 @@ provider-governance-by-name: schema: $ref: '../../schemas/inference/common.yaml#/BifrostError' '500': - $ref: '../../openapi.yaml#/components/responses/InternalError' \ No newline at end of file + $ref: '../../openapi.yaml#/components/responses/InternalError' +# Pricing Overrides CRUD + +pricing-overrides: + get: + operationId: listPricingOverrides + summary: List pricing overrides + description: Returns all pricing overrides, optionally filtered by scope. + tags: + - Governance + parameters: + - name: scope_kind + in: query + description: Filter by scope kind + schema: + type: string + enum: + - global + - provider + - provider_key + - virtual_key + - virtual_key_provider + - virtual_key_provider_key + - name: virtual_key_id + in: query + description: Filter by virtual key ID (for virtual_key* scopes) + schema: + type: string + - name: provider_id + in: query + description: Filter by provider ID + schema: + type: string + - name: provider_key_id + in: query + description: Filter by provider key ID + schema: + type: string + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../schemas/management/governance.yaml#/ListPricingOverridesResponse' + '500': + $ref: '../../openapi.yaml#/components/responses/InternalError' + + post: + operationId: createPricingOverride + summary: Create pricing override + description: Creates a new pricing override. The most specific matching scope always wins during cost resolution. + tags: + - Governance + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../schemas/management/governance.yaml#/CreatePricingOverrideRequest' + responses: + '201': + description: Pricing override created successfully + content: + application/json: + schema: + $ref: '../../schemas/management/governance.yaml#/PricingOverrideResponse' + '400': + $ref: '../../openapi.yaml#/components/responses/BadRequest' + '500': + $ref: '../../openapi.yaml#/components/responses/InternalError' + +pricing-overrides-by-id: + put: + operationId: updatePricingOverride + summary: Update pricing override + description: Updates an existing pricing override. Omitted fields are merged from the existing record. The `patch` field is always replaced in full when provided. + tags: + - Governance + parameters: + - name: id + in: path + required: true + description: Pricing override ID + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../schemas/management/governance.yaml#/UpdatePricingOverrideRequest' + responses: + '200': + description: Pricing override updated successfully + content: + application/json: + schema: + $ref: '../../schemas/management/governance.yaml#/PricingOverrideResponse' + '400': + $ref: '../../openapi.yaml#/components/responses/BadRequest' + '404': + description: Pricing override not found + content: + application/json: + schema: + $ref: '../../schemas/inference/common.yaml#/BifrostError' + '500': + $ref: '../../openapi.yaml#/components/responses/InternalError' + + delete: + operationId: deletePricingOverride + summary: Delete pricing override + description: Deletes a pricing override by ID. + tags: + - Governance + parameters: + - name: id + in: path + required: true + description: Pricing override ID + schema: + type: string + responses: + '200': + description: Pricing override deleted successfully + content: + application/json: + schema: + $ref: '../../schemas/management/common.yaml#/MessageResponse' + '500': + $ref: '../../openapi.yaml#/components/responses/InternalError' diff --git a/docs/openapi/schemas/management/governance.yaml b/docs/openapi/schemas/management/governance.yaml index 7053f1a8dc..a5d8b379f6 100644 --- a/docs/openapi/schemas/management/governance.yaml +++ b/docs/openapi/schemas/management/governance.yaml @@ -937,3 +937,334 @@ UpdateProviderGovernanceRequest: rate_limit: $ref: '#/UpdateRateLimitRequest' description: Rate limit configuration + +# Pricing Overrides + +PricingOverrideRequestType: + type: string + description: > + Request type for pricing override filtering. Stream variants are treated + identically to their base type — specifying `chat_completion` covers both + streaming and non-streaming chat requests. + enum: + - chat_completion + - text_completion + - responses + - embedding + - rerank + - speech + - transcription + - image_generation + - image_variation + - image_edit + - video_generation + - video_remix + +PricingPatch: + type: object + description: > + Pricing fields to override. Only non-zero/non-null fields are applied. + All values are cost per unit in USD. + properties: + input_cost_per_token: + type: number + minimum: 0 + output_cost_per_token: + type: number + minimum: 0 + input_cost_per_token_batches: + type: number + minimum: 0 + output_cost_per_token_batches: + type: number + minimum: 0 + input_cost_per_token_priority: + type: number + minimum: 0 + output_cost_per_token_priority: + type: number + minimum: 0 + input_cost_per_character: + type: number + minimum: 0 + input_cost_per_token_above_128k_tokens: + type: number + minimum: 0 + output_cost_per_token_above_128k_tokens: + type: number + minimum: 0 + input_cost_per_token_above_200k_tokens: + type: number + minimum: 0 + output_cost_per_token_above_200k_tokens: + type: number + minimum: 0 + cache_creation_input_token_cost: + type: number + minimum: 0 + cache_read_input_token_cost: + type: number + minimum: 0 + cache_creation_input_token_cost_above_200k_tokens: + type: number + minimum: 0 + cache_read_input_token_cost_above_200k_tokens: + type: number + minimum: 0 + cache_read_input_token_cost_priority: + type: number + minimum: 0 + cache_read_input_image_token_cost: + type: number + minimum: 0 + cache_creation_input_audio_token_cost: + type: number + minimum: 0 + input_cost_per_image: + type: number + minimum: 0 + output_cost_per_image: + type: number + minimum: 0 + input_cost_per_pixel: + type: number + minimum: 0 + output_cost_per_pixel: + type: number + minimum: 0 + input_cost_per_image_token: + type: number + minimum: 0 + output_cost_per_image_token: + type: number + minimum: 0 + output_cost_per_image_low_quality: + type: number + minimum: 0 + output_cost_per_image_medium_quality: + type: number + minimum: 0 + output_cost_per_image_high_quality: + type: number + minimum: 0 + output_cost_per_image_auto_quality: + type: number + minimum: 0 + output_cost_per_image_premium_image: + type: number + minimum: 0 + output_cost_per_image_above_512_and_512_pixels: + type: number + minimum: 0 + output_cost_per_image_above_1024_and_1024_pixels: + type: number + minimum: 0 + output_cost_per_image_above_2048_and_2048_pixels: + type: number + minimum: 0 + output_cost_per_image_above_4096_and_4096_pixels: + type: number + minimum: 0 + input_cost_per_audio_token: + type: number + minimum: 0 + output_cost_per_audio_token: + type: number + minimum: 0 + input_cost_per_audio_per_second: + type: number + minimum: 0 + input_cost_per_second: + type: number + minimum: 0 + input_cost_per_video_per_second: + type: number + minimum: 0 + output_cost_per_video_per_second: + type: number + minimum: 0 + output_cost_per_second: + type: number + minimum: 0 + search_context_cost_per_query: + type: number + minimum: 0 + code_interpreter_cost_per_session: + type: number + minimum: 0 + +PricingOverride: + type: object + description: A pricing override that applies custom rates to matching requests. + properties: + id: + type: string + description: Unique override ID (UUID) + name: + type: string + description: Human-readable label + scope_kind: + type: string + enum: + - global + - provider + - provider_key + - virtual_key + - virtual_key_provider + - virtual_key_provider_key + description: Scope that determines which requests this override applies to + virtual_key_id: + type: string + nullable: true + description: Required for virtual_key* scopes + provider_id: + type: string + nullable: true + description: Required for provider and virtual_key_provider scopes + provider_key_id: + type: string + nullable: true + description: Required for provider_key and virtual_key_provider_key scopes + match_type: + type: string + enum: + - exact + - wildcard + description: How the pattern is matched against the model name + pattern: + type: string + description: Model name or wildcard prefix (e.g. "gpt-4o" or "claude-3*") + request_types: + type: array + minItems: 1 + items: + $ref: '#/PricingOverrideRequestType' + description: Request types this override applies to. At least one value is required. + pricing_patch: + type: string + description: JSON-encoded pricing fields to override (as stored in the database) + patch: + $ref: '#/PricingPatch' + description: Decoded pricing fields (present in API responses) + config_hash: + type: string + nullable: true + description: Auto-managed hash for config-file-sourced overrides. Do not set manually. + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + +CreatePricingOverrideRequest: + type: object + description: Request body for creating a pricing override. + required: + - name + - scope_kind + - match_type + - pattern + - request_types + properties: + name: + type: string + description: Human-readable label + scope_kind: + type: string + enum: + - global + - provider + - provider_key + - virtual_key + - virtual_key_provider + - virtual_key_provider_key + virtual_key_id: + type: string + description: Required for virtual_key* scopes + provider_id: + type: string + description: Required for provider and virtual_key_provider scopes + provider_key_id: + type: string + description: Required for provider_key and virtual_key_provider_key scopes + match_type: + type: string + enum: + - exact + - wildcard + pattern: + type: string + description: Model name or wildcard prefix ending with * (e.g. "claude-3*") + request_types: + type: array + minItems: 1 + items: + $ref: '#/PricingOverrideRequestType' + description: Request types this override applies to. At least one value is required. + patch: + $ref: '#/PricingPatch' + +UpdatePricingOverrideRequest: + type: object + description: > + Request body for updating a pricing override. All fields are optional — + omitted fields are merged from the existing record. The `patch` field is + always replaced in full when provided. + properties: + name: + type: string + description: Human-readable label + scope_kind: + type: string + enum: + - global + - provider + - provider_key + - virtual_key + - virtual_key_provider + - virtual_key_provider_key + virtual_key_id: + type: string + description: Required for virtual_key* scopes + provider_id: + type: string + description: Required for provider and virtual_key_provider scopes + provider_key_id: + type: string + description: Required for provider_key and virtual_key_provider_key scopes + match_type: + type: string + enum: + - exact + - wildcard + pattern: + type: string + description: Model name or wildcard prefix ending with * (e.g. "claude-3*") + request_types: + type: array + minItems: 1 + items: + $ref: '#/PricingOverrideRequestType' + description: Request types this override applies to. + patch: + $ref: '#/PricingPatch' + +PricingOverrideResponse: + type: object + properties: + message: + type: string + pricing_override: + $ref: '#/PricingOverride' + +ListPricingOverridesResponse: + type: object + properties: + pricing_overrides: + type: array + items: + $ref: '#/PricingOverride' + count: + type: integer + description: Total number of overrides returned diff --git a/docs/providers/custom-pricing.mdx b/docs/providers/custom-pricing.mdx new file mode 100644 index 0000000000..13c883773f --- /dev/null +++ b/docs/providers/custom-pricing.mdx @@ -0,0 +1,410 @@ +--- +title: "Custom Pricing" +description: "Set custom rates for any model across global or virtual key scopes, optionally narrowed to a specific provider or key." +icon: "circle-dollar-to-slot" +--- + +## Overview + +Bifrost computes request costs using a built-in pricing catalog that is automatically synced from a remote datasheet. **Custom Pricing** lets you override those catalog prices at runtime without redeploying, applying your own rates for any model across any combination of provider, key, and virtual key scopes. + +**Key capabilities:** +- **Scoped overrides** — apply prices globally or narrow them to a specific provider, provider key, or virtual key +- **Pattern matching** — target an exact model name or a wildcard prefix (e.g. `gpt-4*`) +- **Request type filtering** — restrict an override to one or more specific operations (chat, embeddings, image generation, etc.); at least one request type is required +- **Hierarchical resolution** — the most-specific matching override always wins; broader scopes act as fallbacks + +--- + +## Pricing data source + +Before configuring overrides, Bifrost needs a pricing catalog to work from. By default it ships with built-in prices and syncs them every 24 hours. You can point it at a custom pricing URL if you maintain your own datasheet. + + + + +1. Navigate to **Models** in the sidebar +2. Click the **Pricing Settings** tab +3. Enter your pricing datasheet URL in the **Pricing Datasheet URL** field +4. Set the **Pricing Sync Interval** (in hours) +5. Click **Save** + + + + +```json +{ + "framework": { + "pricing": { + "pricing_url": "https://your-host/pricing.json", + "pricing_sync_interval": 86400 + } + } +} +``` + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `pricing_url` | string (URI) | No | built-in | URL of the pricing datasheet to sync from | +| `pricing_sync_interval` | integer | No | `86400` | Sync interval in seconds. Minimum `3600` (1 hour) | + + + + +--- + +## Scope hierarchy + +Every override is assigned a **scope kind** that determines which requests it applies to. When Bifrost resolves pricing for a request, it evaluates all matching overrides and selects the one with the most specific scope. More specific scopes always win over broader ones. + +``` +virtual_key_provider_key (most specific) +virtual_key_provider +virtual_key +provider_key +provider +global (least specific / catch-all) +``` + +**Scope kinds and their required identifiers:** + +| Scope kind | Required | Description | +|------------|----------|-------------| +| `global` | — | Applies to every request regardless of provider, key, or virtual key | +| `provider` | `provider_id` | Applies to all keys under a specific provider | +| `provider_key` | `provider_key_id` | Applies to a specific provider API key only | +| `virtual_key` | `virtual_key_id` | Applies to all requests made under a virtual key | +| `virtual_key_provider` | `virtual_key_id` + `provider_id` | Applies when a virtual key routes to a specific provider | +| `virtual_key_provider_key` | `virtual_key_id` + `provider_key_id` | Most specific: virtual key + exact provider API key | + + +Scope identifiers are exclusive to their scope kind — you cannot mix them. For example, `virtual_key_provider` requires `virtual_key_id` and `provider_id` and must not include `provider_key_id`. + + +--- + +## Pattern matching + +The `pattern` field controls which model names the override applies to. The `match_type` field controls how the pattern is interpreted. + +| Match type | Behavior | Example | +|------------|----------|---------| +| `exact` | Matches only the exact model name | `gpt-4o` matches only `gpt-4o` | +| `wildcard` | Prefix match — pattern must end with `*` | `gpt-4*` matches `gpt-4o`, `gpt-4-turbo`, `gpt-4o-mini` | + + +For wildcard patterns, append a `*` at the end of the prefix. For example, `claude-3*` will match all Claude 3 variants. + + +--- + +## Request type filtering + +`request_types` is **required** and must contain at least one value. Only request types that have pricing support are accepted. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests. + +| Type | Description | +|------|-------------| +| `chat_completion` | Chat requests (streaming included) | +| `text_completion` | Legacy text completions (streaming included) | +| `responses` | Responses API requests (streaming included) | +| `embedding` | Embedding generation | +| `rerank` | Reranking | +| `speech` | Text-to-speech (streaming included) | +| `transcription` | Speech-to-text (streaming included) | +| `image_generation` | Image generation (streaming included) | +| `image_variation` | Image variation | +| `image_edit` | Image editing (streaming included) | +| `video_generation` | Video generation | +| `video_remix` | Video remixing | + +--- + +## Creating an override + + + + +1. Navigate to **Models** → **Pricing Overrides** in the sidebar + +![Pricing Overrides Table](../media/ui-custom-pricing-table.png) + +2. Click **Create Override** +3. Fill in the form: + - **Name** — a human-readable label + - **Scope** — select the scope kind and provide the matching IDs + - **Pattern** — enter the model name or wildcard prefix + - **Match type** — choose **Exact** or **Wildcard** + - **Request types** — select one or more request types (required) + - **Pricing fields** — enter the price values you want to override (only non-zero fields are applied) +4. Click **Save** + +![Pricing Override Form](../media/ui-custom-pricing-form.png) + + + + +```bash +curl -X POST http://localhost:8080/api/governance/pricing-overrides \ + -H "Content-Type: application/json" \ + -d '{ + "name": "GPT-4o reduced input cost", + "scope_kind": "global", + "match_type": "exact", + "pattern": "gpt-4o", + "request_types": ["chat_completion"], + "patch": { + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.000010 + } + }' +``` + +**Response:** +```json +{ + "message": "Pricing override created successfully", + "pricing_override": { + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "GPT-4o reduced input cost", + "scope_kind": "global", + "match_type": "exact", + "pattern": "gpt-4o", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}", + "created_at": "2026-03-20T10:00:00Z", + "updated_at": "2026-03-20T10:00:00Z" + } +} +``` + +**Update (sparse patch):** +```bash +curl -X PATCH http://localhost:8080/api/governance/pricing-overrides/{id} \ + -H "Content-Type: application/json" \ + -d '{ + "patch": { + "input_cost_per_token": 0.000002 + } + }' +``` + +**Delete:** +```bash +curl -X DELETE http://localhost:8080/api/governance/pricing-overrides/{id} +``` + +**List (with optional filters):** +```bash +# All overrides +curl http://localhost:8080/api/governance/pricing-overrides + +# Filter by scope +curl "http://localhost:8080/api/governance/pricing-overrides?scope_kind=virtual_key&virtual_key_id=vk-abc123" +``` + + + + +Pricing overrides are defined under `governance.pricing_overrides`. Each entry requires `id`, `name`, `scope_kind`, `match_type`, `pattern`, and `request_types`. The `pricing_patch` is a JSON-encoded string containing only the fields you want to override. + +```json +{ + "governance": { + "pricing_overrides": [ + { + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "Global GPT-4o rate", + "scope_kind": "global", + "match_type": "exact", + "pattern": "gpt-4o", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}" + }, + { + "id": "660e8400-e29b-41d4-a716-446655440001", + "name": "All Claude models for prod VK", + "scope_kind": "virtual_key", + "virtual_key_id": "vk-abc123", + "match_type": "wildcard", + "pattern": "claude-3*", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}" + } + ] + } +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `id` | string | Yes | Unique override ID (UUID recommended) | +| `name` | string | Yes | Human-readable label | +| `scope_kind` | string | Yes | One of: `global`, `provider`, `provider_key`, `virtual_key`, `virtual_key_provider`, `virtual_key_provider_key` | +| `virtual_key_id` | string | Conditional | Required for `virtual_key*` scopes | +| `provider_id` | string | Conditional | Required for `provider` and `virtual_key_provider` scopes | +| `provider_key_id` | string | Conditional | Required for `provider_key` and `virtual_key_provider_key` scopes | +| `match_type` | string | Yes | `exact` or `wildcard` | +| `pattern` | string | Yes | Model name or wildcard prefix ending with `*` | +| `request_types` | array | Yes | Request types this override applies to. At least one value required. | +| `pricing_patch` | string | No | JSON-encoded pricing fields to override | +| `config_hash` | string | No | Auto-managed. Do not set manually | + + + + +--- + +## Pricing fields reference + +Only fields with non-zero values are applied. All values are cost **per unit** in USD. + +### Token costs + +| Field | Description | +|-------|-------------| +| `input_cost_per_token` | Standard input token cost | +| `output_cost_per_token` | Standard output token cost | +| `input_cost_per_token_batches` | Input token cost for batch requests | +| `output_cost_per_token_batches` | Output token cost for batch requests | +| `input_cost_per_token_priority` | Input token cost for priority requests | +| `output_cost_per_token_priority` | Output token cost for priority requests | +| `input_cost_per_character` | Input cost per character (character-billed models) | + +### Token tier costs + +| Field | Description | +|-------|-------------| +| `input_cost_per_token_above_128k_tokens` | Input cost above 128k context | +| `output_cost_per_token_above_128k_tokens` | Output cost above 128k context | +| `input_cost_per_token_above_200k_tokens` | Input cost above 200k context | +| `output_cost_per_token_above_200k_tokens` | Output cost above 200k context | + +### Cache costs + +| Field | Description | +|-------|-------------| +| `cache_creation_input_token_cost` | Cost to write a token to the prompt cache | +| `cache_read_input_token_cost` | Cost to read a cached input token | +| `cache_creation_input_token_cost_above_200k_tokens` | Cache creation above 200k context | +| `cache_read_input_token_cost_above_200k_tokens` | Cache read above 200k context | +| `cache_read_input_token_cost_priority` | Priority cache read cost | +| `cache_read_input_image_token_cost` | Cache read cost for image tokens | +| `cache_creation_input_audio_token_cost` | Cache creation cost for audio tokens | + +### Image costs + +| Field | Description | +|-------|-------------| +| `input_cost_per_image` | Cost per input image | +| `output_cost_per_image` | Cost per generated image | +| `input_cost_per_pixel` | Cost per input pixel | +| `output_cost_per_pixel` | Cost per output pixel | +| `input_cost_per_image_token` | Cost per image input token | +| `output_cost_per_image_token` | Cost per image output token | +| `output_cost_per_image_low_quality` | Generated image — low quality | +| `output_cost_per_image_medium_quality` | Generated image — medium quality | +| `output_cost_per_image_high_quality` | Generated image — high quality | +| `output_cost_per_image_auto_quality` | Generated image — auto quality | +| `output_cost_per_image_above_512_and_512_pixels` | Generated image > 512×512 | +| `output_cost_per_image_above_1024_and_1024_pixels` | Generated image > 1024×1024 | +| `output_cost_per_image_above_2048_and_2048_pixels` | Generated image > 2048×2048 | +| `output_cost_per_image_above_4096_and_4096_pixels` | Generated image > 4096×4096 | + +### Audio and video costs + +| Field | Description | +|-------|-------------| +| `input_cost_per_audio_token` | Cost per audio input token | +| `input_cost_per_audio_per_second` | Cost per second of audio input | +| `input_cost_per_second` | Cost per second of input (generic) | +| `input_cost_per_video_per_second` | Cost per second of video input | +| `output_cost_per_audio_token` | Cost per audio output token | +| `output_cost_per_second` | Cost per second of audio output | +| `output_cost_per_video_per_second` | Cost per second of video output | +| `input_cost_per_video_per_second_above_128k_tokens` | Video input cost above 128k context | +| `input_cost_per_audio_per_second_above_128k_tokens` | Audio input cost above 128k context | + +### Other costs + +| Field | Description | +|-------|-------------| +| `search_context_cost_per_query` | Cost per web search context query | +| `code_interpreter_cost_per_session` | Cost per code interpreter session | + +--- + +## Examples + +### Flat rate for all Anthropic models + +Apply a single input/output rate to every Claude model globally: + +```json +{ + "id": "anthropic-flat-rate", + "name": "Anthropic flat rate", + "scope_kind": "provider", + "provider_id": "anthropic", + "match_type": "wildcard", + "pattern": "claude*", + "request_types": ["chat_completion", "text_completion", "responses"], + "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}" +} +``` + +### Per-virtual-key negotiated rate + +A specific virtual key has negotiated lower prices for GPT-4o: + +```json +{ + "id": "vk-prod-gpt4o-rate", + "name": "Prod VK — GPT-4o negotiated rate", + "scope_kind": "virtual_key", + "virtual_key_id": "vk-abc123", + "match_type": "exact", + "pattern": "gpt-4o", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.000002,\"output_cost_per_token\":0.000008}" +} +``` + +### Image generation override + +Override costs for a specific image model at global scope: + +```json +{ + "id": "dall-e-3-rate", + "name": "DALL-E 3 custom rate", + "scope_kind": "global", + "match_type": "exact", + "pattern": "dall-e-3", + "request_types": ["image_generation"], + "pricing_patch": "{\"output_cost_per_image_high_quality\":0.04,\"output_cost_per_image_medium_quality\":0.02}" +} +``` + +### Global catch-all for a new model + +Use a global override to add pricing for a model not yet in the built-in catalog: + +```json +{ + "id": "my-new-model-rate", + "name": "my-new-model pricing", + "scope_kind": "global", + "match_type": "exact", + "pattern": "my-new-model-v1", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.000001,\"output_cost_per_token\":0.000005}" +} +``` + +--- + +## Next steps + +- **[Virtual Keys](../features/governance/virtual-keys)** — Attach virtual-key-scoped overrides to virtual keys for per-customer pricing +- **[Budget and Limits](../features/governance/budget-and-limits)** — Understand how costs are tracked against budgets +- **[Model Catalog](../architecture/framework/model-catalog)** — Deep dive into how pricing resolution and cost calculation work internally diff --git a/examples/configs/withpricingoverridesnostore/config.json b/examples/configs/withpricingoverridesnostore/config.json new file mode 100644 index 0000000000..cfb29ebd35 --- /dev/null +++ b/examples/configs/withpricingoverridesnostore/config.json @@ -0,0 +1,74 @@ +{ + "$schema": "https://www.getbifrost.ai/schema", + "config_store": { + "enabled": false + }, + "logs_store": { + "enabled": false + }, + "governance": { + "pricing_overrides": [ + { + "id": "override-global-gpt4o", + "name": "Global GPT-4o Pricing", + "scope_kind": "global", + "match_type": "exact", + "pattern": "gpt-4o", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}" + }, + { + "id": "override-global-claude-wildcard", + "name": "Global Claude Models Pricing", + "scope_kind": "global", + "match_type": "wildcard", + "pattern": "claude-*", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}" + }, + { + "id": "override-provider-openai-gpt4o-mini", + "name": "OpenAI GPT-4o Mini Pricing", + "scope_kind": "provider", + "provider_id": "openai", + "match_type": "exact", + "pattern": "gpt-4o-mini", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.00000015,\"output_cost_per_token\":0.0000006}" + } + ] + }, + "plugins": [ + { + "name": "governance", + "enabled": true, + "config": { + "is_vk_mandatory": false + } + } + ], + "providers": { + "openai": { + "keys": [ + { + "id": "key-openai-1", + "name": "openai-key-1", + "value": "env.OPENAI_API_KEY", + "weight": 1, + "models": ["*"] + } + ] + }, + "anthropic": { + "keys": [ + { + "id": "key-anthropic-1", + "name": "anthropic-key-1", + "value": "env.ANTHROPIC_API_KEY", + "weight": 1, + "models": ["*"] + } + ] + } + } +} diff --git a/examples/configs/withpricingoverridessqlite/config.json b/examples/configs/withpricingoverridessqlite/config.json new file mode 100644 index 0000000000..b99094bcea --- /dev/null +++ b/examples/configs/withpricingoverridessqlite/config.json @@ -0,0 +1,82 @@ +{ + "$schema": "https://www.getbifrost.ai/schema", + "config_store": { + "enabled": true, + "type": "sqlite", + "config": { + "path": "config.db" + } + }, + "logs_store": { + "enabled": true, + "type": "sqlite", + "config": { + "path": "logs.db" + } + }, + "governance": { + "pricing_overrides": [ + { + "id": "override-global-gpt4o", + "name": "Global GPT-4o Pricing", + "scope_kind": "global", + "match_type": "exact", + "pattern": "gpt-4o", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}" + }, + { + "id": "override-global-claude-wildcard", + "name": "Global Claude Models Pricing", + "scope_kind": "global", + "match_type": "wildcard", + "pattern": "claude-*", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}" + }, + { + "id": "override-provider-openai-gpt4o-mini", + "name": "OpenAI GPT-4o Mini Pricing", + "scope_kind": "provider", + "provider_id": "openai", + "match_type": "exact", + "pattern": "gpt-4o-mini", + "request_types": ["chat_completion"], + "pricing_patch": "{\"input_cost_per_token\":0.00000015,\"output_cost_per_token\":0.0000006}" + } + ] + }, + "plugins": [ + { + "name": "governance", + "enabled": true, + "config": { + "is_vk_mandatory": false + } + } + ], + "providers": { + "openai": { + "keys": [ + { + "id": "key-openai-1", + "name": "openai-key-1", + "value": "env.OPENAI_API_KEY", + "weight": 1, + "models": ["*"] + } + ] + }, + "anthropic": { + "keys": [ + { + "id": "key-anthropic-1", + "name": "anthropic-key-1", + "value": "env.ANTHROPIC_API_KEY", + "weight": 1, + "models": ["*"] + } + ] + } + } +} diff --git a/framework/configstore/clientconfig.go b/framework/configstore/clientconfig.go index 76b8631ee4..431d36d4c3 100644 --- a/framework/configstore/clientconfig.go +++ b/framework/configstore/clientconfig.go @@ -262,7 +262,6 @@ type ProviderConfig struct { SendBackRawResponse bool `json:"send_back_raw_response"` // Include raw response in BifrostResponse StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration - PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides ConfigHash string `json:"config_hash,omitempty"` // Hash of config.json version, used for change detection Status string `json:"status,omitempty"` // Model discovery status for keyless providers Description string `json:"description,omitempty"` // Model discovery error message for keyless providers @@ -282,7 +281,6 @@ func (p *ProviderConfig) Redacted() *ProviderConfig { SendBackRawResponse: p.SendBackRawResponse, StoreRawRequestResponse: p.StoreRawRequestResponse, CustomProviderConfig: p.CustomProviderConfig, - PricingOverrides: p.PricingOverrides, ConfigHash: p.ConfigHash, Status: p.Status, Description: p.Description, @@ -451,15 +449,6 @@ func (p *ProviderConfig) GenerateConfigHash(providerName string) (string, error) hash.Write(data) } - // Hash PricingOverrides - if p.PricingOverrides != nil { - data, err := sonic.Marshal(p.PricingOverrides) - if err != nil { - return "", err - } - hash.Write(data) - } - // Hash SendBackRawRequest if p.SendBackRawRequest { hash.Write([]byte("sendBackRawRequest")) @@ -978,6 +967,23 @@ func GenerateRoutingRuleHash(r tables.TableRoutingRule) (string, error) { return hex.EncodeToString(hash.Sum(nil)), nil } +// GeneratePricingOverrideHash generates a SHA256 hash for a pricing override. +// Skips: CreatedAt, UpdatedAt, ConfigHash (dynamic/meta fields). +func GeneratePricingOverrideHash(p tables.TablePricingOverride) (string, error) { + hash := sha256.New() + hash.Write([]byte(p.ID)) + hash.Write([]byte(p.Name)) + hash.Write([]byte(p.ScopeKind)) + hash.Write([]byte(derefStr(p.VirtualKeyID))) + hash.Write([]byte(derefStr(p.ProviderID))) + hash.Write([]byte(derefStr(p.ProviderKeyID))) + hash.Write([]byte(p.MatchType)) + hash.Write([]byte(p.Pattern)) + hash.Write([]byte(p.RequestTypesJSON)) + hash.Write([]byte(p.PricingPatchJSON)) + return hex.EncodeToString(hash.Sum(nil)), nil +} + // GenerateMCPClientHash generates a SHA256 hash for an MCP client. // This is used to detect changes to MCP clients between config.json and database. // Skips: ID (autoIncrement), CreatedAt, UpdatedAt (dynamic fields) @@ -1101,14 +1107,17 @@ type AuthConfig struct { // ConfigMap maps provider names to their configurations. type ConfigMap map[schemas.ModelProvider]ProviderConfig +// GovernanceConfig contains governance entities loaded from the config store or +// reconciled from config.json. type GovernanceConfig struct { - VirtualKeys []tables.TableVirtualKey `json:"virtual_keys"` - Teams []tables.TableTeam `json:"teams"` - Customers []tables.TableCustomer `json:"customers"` - Budgets []tables.TableBudget `json:"budgets"` - RateLimits []tables.TableRateLimit `json:"rate_limits"` - ModelConfigs []tables.TableModelConfig `json:"model_configs"` - Providers []tables.TableProvider `json:"providers"` - RoutingRules []tables.TableRoutingRule `json:"routing_rules"` - AuthConfig *AuthConfig `json:"auth_config,omitempty"` + VirtualKeys []tables.TableVirtualKey `json:"virtual_keys"` + Teams []tables.TableTeam `json:"teams"` + Customers []tables.TableCustomer `json:"customers"` + Budgets []tables.TableBudget `json:"budgets"` + RateLimits []tables.TableRateLimit `json:"rate_limits"` + ModelConfigs []tables.TableModelConfig `json:"model_configs"` + Providers []tables.TableProvider `json:"providers"` + RoutingRules []tables.TableRoutingRule `json:"routing_rules"` + PricingOverrides []tables.TablePricingOverride `json:"pricing_overrides,omitempty"` + AuthConfig *AuthConfig `json:"auth_config,omitempty"` } diff --git a/framework/configstore/migrations.go b/framework/configstore/migrations.go index 9f0f4c22a7..23c69dafa8 100644 --- a/framework/configstore/migrations.go +++ b/framework/configstore/migrations.go @@ -274,7 +274,7 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error { if err := migrationAddEnforceAuthOnInferenceColumn(ctx, db); err != nil { return err } - if err := migrationAddProviderPricingOverridesColumn(ctx, db); err != nil { + if err := migrationReconcilePricingOverridesTable(ctx, db); err != nil { return err } if err := migrationAddEncryptionColumns(ctx, db); err != nil { @@ -329,6 +329,9 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error { if err := migrationAddMCPClientAllowedExtraHeadersJSONColumn(ctx, db); err != nil { return err } + if err := migrationMakeBasePricingColumnsNullable(ctx, db); err != nil { + return err + } return nil } @@ -355,7 +358,6 @@ func migrationAddStoreRawRequestResponseColumn(ctx context.Context, db *gorm.DB) "concurrency_buffer_json", "proxy_config_json", "custom_provider_config_json", - "pricing_overrides_json", "send_back_raw_request", "send_back_raw_response", "store_raw_request_response", @@ -373,7 +375,6 @@ func migrationAddStoreRawRequestResponseColumn(ctx context.Context, db *gorm.DB) SendBackRawResponse: provider.SendBackRawResponse, StoreRawRequestResponse: provider.StoreRawRequestResponse, CustomProviderConfig: provider.CustomProviderConfig, - PricingOverrides: provider.PricingOverrides, } // Here the default value of store_raw_request_response should be based on the default value of SendBackRawRequest and SendBackRawResponse if provider.SendBackRawRequest || provider.SendBackRawResponse { @@ -511,6 +512,11 @@ func migrationInit(ctx context.Context, db *gorm.DB) error { return err } } + if !migrator.HasTable(&tables.TablePricingOverride{}) { + if err := migrator.CreateTable(&tables.TablePricingOverride{}); err != nil { + return err + } + } if !migrator.HasTable(&tables.TablePlugin{}) { if err := migrator.CreateTable(&tables.TablePlugin{}); err != nil { return err @@ -568,6 +574,9 @@ func migrationInit(ctx context.Context, db *gorm.DB) error { if err := migrator.DropTable(&tables.TableModelPricing{}); err != nil { return err } + if err := migrator.DropTable(&tables.TablePricingOverride{}); err != nil { + return err + } if err := migrator.DropTable(&tables.TablePlugin{}); err != nil { return err } @@ -4042,33 +4051,45 @@ func migrationAddEnforceAuthOnInferenceColumn(ctx context.Context, db *gorm.DB) return nil } -// migrationAddProviderPricingOverridesColumn adds the pricing_overrides_json column to the config_provider table -func migrationAddProviderPricingOverridesColumn(ctx context.Context, db *gorm.DB) error { +func migrationReconcilePricingOverridesTable(ctx context.Context, db *gorm.DB) error { m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{ - ID: "add_provider_pricing_overrides_column", + ID: "reconcile_pricing_overrides_table", Migrate: func(tx *gorm.DB) error { tx = tx.WithContext(ctx) - migrator := tx.Migrator() - if !migrator.HasColumn(&tables.TableProvider{}, "pricing_overrides_json") { - if err := migrator.AddColumn(&tables.TableProvider{}, "PricingOverridesJSON"); err != nil { - return fmt.Errorf("failed to add pricing_overrides_json column: %w", err) + mgr := tx.Migrator() + + if !mgr.HasTable(&tables.TablePricingOverride{}) { + if err := mgr.CreateTable(&tables.TablePricingOverride{}); err != nil { + return fmt.Errorf("failed to create governance_pricing_overrides table: %w", err) + } + return nil + } + if err := tx.AutoMigrate(&tables.TablePricingOverride{}); err != nil { + return fmt.Errorf("failed to automigrate governance_pricing_overrides table: %w", err) + } + for _, indexName := range []string{"idx_pricing_override_scope", "idx_pricing_override_match"} { + if mgr.HasIndex(&tables.TablePricingOverride{}, indexName) { + continue + } + if err := mgr.CreateIndex(&tables.TablePricingOverride{}, indexName); err != nil { + return fmt.Errorf("failed to create pricing override index %s: %w", indexName, err) } } return nil }, Rollback: func(tx *gorm.DB) error { tx = tx.WithContext(ctx) - migrator := tx.Migrator() - if migrator.HasColumn(&tables.TableProvider{}, "pricing_overrides_json") { - if err := migrator.DropColumn(&tables.TableProvider{}, "pricing_overrides_json"); err != nil { - return fmt.Errorf("failed to drop pricing_overrides_json column: %w", err) + mgr := tx.Migrator() + if mgr.HasTable(&tables.TablePricingOverride{}) { + if err := mgr.DropTable(&tables.TablePricingOverride{}); err != nil { + return fmt.Errorf("failed to drop governance_pricing_overrides table: %w", err) } } return nil }, }}) if err := m.Migrate(); err != nil { - return fmt.Errorf("error running provider pricing overrides column migration: %s", err.Error()) + return fmt.Errorf("error while running pricing overrides table reconcile migration: %s", err.Error()) } return nil } @@ -5108,3 +5129,31 @@ func migrationAddPluginOrderColumns(ctx context.Context, db *gorm.DB) error { } return nil } + +// migrationMakeBasePricingColumnsNullable drops the NOT NULL constraint on +// input_cost_per_token and output_cost_per_token in governance_model_pricing, +// allowing models that only have non-token pricing (image, audio, video) to be +// stored without a placeholder zero value. +func migrationMakeBasePricingColumnsNullable(ctx context.Context, db *gorm.DB) error { + m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{ + ID: "make_base_pricing_columns_nullable", + Migrate: func(tx *gorm.DB) error { + tx = tx.WithContext(ctx) + m := tx.Migrator() + if err := m.AlterColumn(&tables.TableModelPricing{}, "InputCostPerToken"); err != nil { + return fmt.Errorf("failed to alter input_cost_per_token: %w", err) + } + if err := m.AlterColumn(&tables.TableModelPricing{}, "OutputCostPerToken"); err != nil { + return fmt.Errorf("failed to alter output_cost_per_token: %w", err) + } + return nil + }, + Rollback: func(tx *gorm.DB) error { + return nil + }, + }}) + if err := m.Migrate(); err != nil { + return fmt.Errorf("error while running make_base_pricing_columns_nullable migration: %s", err.Error()) + } + return nil +} diff --git a/framework/configstore/rdb.go b/framework/configstore/rdb.go index 3c5d138f59..5ae4bacd99 100644 --- a/framework/configstore/rdb.go +++ b/framework/configstore/rdb.go @@ -252,7 +252,6 @@ func (s *RDBConfigStore) UpdateProvidersConfig(ctx context.Context, providers ma SendBackRawResponse: providerConfig.SendBackRawResponse, StoreRawRequestResponse: providerConfig.StoreRawRequestResponse, CustomProviderConfig: providerConfig.CustomProviderConfig, - PricingOverrides: providerConfig.PricingOverrides, ConfigHash: providerConfig.ConfigHash, Status: providerConfig.Status, Description: providerConfig.Description, @@ -423,7 +422,6 @@ func (s *RDBConfigStore) UpdateProvider(ctx context.Context, provider schemas.Mo dbProvider.SendBackRawResponse = configCopy.SendBackRawResponse dbProvider.StoreRawRequestResponse = configCopy.StoreRawRequestResponse dbProvider.CustomProviderConfig = configCopy.CustomProviderConfig - dbProvider.PricingOverrides = configCopy.PricingOverrides dbProvider.ConfigHash = configCopy.ConfigHash // Save the updated provider @@ -562,7 +560,6 @@ func (s *RDBConfigStore) AddProvider(ctx context.Context, provider schemas.Model SendBackRawResponse: configCopy.SendBackRawResponse, StoreRawRequestResponse: configCopy.StoreRawRequestResponse, CustomProviderConfig: configCopy.CustomProviderConfig, - PricingOverrides: configCopy.PricingOverrides, ConfigHash: configCopy.ConfigHash, } // Create the provider @@ -721,7 +718,6 @@ func (s *RDBConfigStore) GetProvidersConfig(ctx context.Context) (map[schemas.Mo SendBackRawResponse: dbProvider.SendBackRawResponse, StoreRawRequestResponse: dbProvider.StoreRawRequestResponse, CustomProviderConfig: dbProvider.CustomProviderConfig, - PricingOverrides: dbProvider.PricingOverrides, ConfigHash: dbProvider.ConfigHash, Status: dbProvider.Status, Description: dbProvider.Description, @@ -770,7 +766,6 @@ func (s *RDBConfigStore) GetProviderConfig(ctx context.Context, provider schemas SendBackRawResponse: dbProvider.SendBackRawResponse, StoreRawRequestResponse: dbProvider.StoreRawRequestResponse, CustomProviderConfig: dbProvider.CustomProviderConfig, - PricingOverrides: dbProvider.PricingOverrides, ConfigHash: dbProvider.ConfigHash, Status: dbProvider.Status, Description: dbProvider.Description, @@ -1307,6 +1302,130 @@ func (s *RDBConfigStore) DeleteModelPrices(ctx context.Context, tx ...*gorm.DB) return txDB.WithContext(ctx).Session(&gorm.Session{AllowGlobalUpdate: true}).Delete(&tables.TableModelPricing{}).Error } +func (s *RDBConfigStore) GetPricingOverrides(ctx context.Context, filters PricingOverrideFilters) ([]tables.TablePricingOverride, error) { + var overrides []tables.TablePricingOverride + q := s.db.WithContext(ctx).Model(&tables.TablePricingOverride{}) + if filters.ScopeKind != nil { + q = q.Where("scope_kind = ?", *filters.ScopeKind) + } + if filters.VirtualKeyID != nil { + q = q.Where("virtual_key_id = ?", *filters.VirtualKeyID) + } + if filters.ProviderID != nil { + q = q.Where("provider_id = ?", *filters.ProviderID) + } + if filters.ProviderKeyID != nil { + q = q.Where("provider_key_id = ?", *filters.ProviderKeyID) + } + if err := q.Order("created_at ASC").Find(&overrides).Error; err != nil { + return nil, s.parseGormError(err) + } + return overrides, nil +} + +func (s *RDBConfigStore) GetPricingOverridesPaginated(ctx context.Context, params PricingOverridesQueryParams) ([]tables.TablePricingOverride, int64, error) { + baseQuery := s.db.WithContext(ctx).Model(&tables.TablePricingOverride{}) + + if params.Search != "" { + search := "%" + strings.ToLower(params.Search) + "%" + baseQuery = baseQuery.Where("LOWER(name) LIKE ?", search) + } + if params.ScopeKind != nil { + baseQuery = baseQuery.Where("scope_kind = ?", *params.ScopeKind) + } + if params.VirtualKeyID != nil { + baseQuery = baseQuery.Where("virtual_key_id = ?", *params.VirtualKeyID) + } + if params.ProviderID != nil { + baseQuery = baseQuery.Where("provider_id = ?", *params.ProviderID) + } + if params.ProviderKeyID != nil { + baseQuery = baseQuery.Where("provider_key_id = ?", *params.ProviderKeyID) + } + + var totalCount int64 + if err := baseQuery.Count(&totalCount).Error; err != nil { + return nil, 0, err + } + + limit := params.Limit + offset := params.Offset + + if limit <= 0 { + limit = 25 + } else if limit > 100 { + limit = 100 + } + + if offset < 0 { + offset = 0 + } + + var overrides []tables.TablePricingOverride + if err := baseQuery. + Order("created_at ASC"). + Offset(offset). + Limit(limit). + Find(&overrides).Error; err != nil { + return nil, 0, s.parseGormError(err) + } + return overrides, totalCount, nil +} + +func (s *RDBConfigStore) GetPricingOverrideByID(ctx context.Context, id string) (*tables.TablePricingOverride, error) { + var override tables.TablePricingOverride + if err := s.db.WithContext(ctx).First(&override, "id = ?", id).Error; err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, ErrNotFound + } + return nil, s.parseGormError(err) + } + return &override, nil +} + +func (s *RDBConfigStore) CreatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error { + var txDB *gorm.DB + if len(tx) > 0 { + txDB = tx[0] + } else { + txDB = s.db + } + if err := txDB.WithContext(ctx).Create(override).Error; err != nil { + return s.parseGormError(err) + } + return nil +} + +func (s *RDBConfigStore) UpdatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error { + var txDB *gorm.DB + if len(tx) > 0 { + txDB = tx[0] + } else { + txDB = s.db + } + if err := txDB.WithContext(ctx).Save(override).Error; err != nil { + return s.parseGormError(err) + } + return nil +} + +func (s *RDBConfigStore) DeletePricingOverride(ctx context.Context, id string, tx ...*gorm.DB) error { + var txDB *gorm.DB + if len(tx) > 0 { + txDB = tx[0] + } else { + txDB = s.db + } + res := txDB.WithContext(ctx).Delete(&tables.TablePricingOverride{}, "id = ?", id) + if res.Error != nil { + return s.parseGormError(res.Error) + } + if res.RowsAffected == 0 { + return ErrNotFound + } + return nil +} + // MODEL PARAMETERS METHODS // GetModelParameters retrieves model parameters for a specific model. diff --git a/framework/configstore/store.go b/framework/configstore/store.go index 8d2117c6b5..11d6a6a899 100644 --- a/framework/configstore/store.go +++ b/framework/configstore/store.go @@ -59,6 +59,25 @@ type CustomersQueryParams struct { Search string } +// PricingOverrideFilters holds the filters for pricing overrides. +type PricingOverrideFilters struct { + ScopeKind *string + VirtualKeyID *string + ProviderID *string + ProviderKeyID *string +} + +// PricingOverridesQueryParams holds pagination, filtering, and search parameters for pricing override queries. +type PricingOverridesQueryParams struct { + Limit int + Offset int + Search string + ScopeKind *string + VirtualKeyID *string + ProviderID *string + ProviderKeyID *string +} + // ConfigStore is the interface for the config store. type ConfigStore interface { // Health check @@ -218,6 +237,14 @@ type ConfigStore interface { UpsertModelPrices(ctx context.Context, pricing *tables.TableModelPricing, tx ...*gorm.DB) error DeleteModelPrices(ctx context.Context, tx ...*gorm.DB) error + // Governance pricing overrides CRUD + GetPricingOverrides(ctx context.Context, filters PricingOverrideFilters) ([]tables.TablePricingOverride, error) + GetPricingOverridesPaginated(ctx context.Context, params PricingOverridesQueryParams) ([]tables.TablePricingOverride, int64, error) + GetPricingOverrideByID(ctx context.Context, id string) (*tables.TablePricingOverride, error) + CreatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error + UpdatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error + DeletePricingOverride(ctx context.Context, id string, tx ...*gorm.DB) error + // Model parameters GetModelParameters(ctx context.Context, model string) (*tables.TableModelParameters, error) UpsertModelParameters(ctx context.Context, params *tables.TableModelParameters, tx ...*gorm.DB) error diff --git a/framework/configstore/tables/modelpricing.go b/framework/configstore/tables/modelpricing.go index d264e74207..b34d3e3385 100644 --- a/framework/configstore/tables/modelpricing.go +++ b/framework/configstore/tables/modelpricing.go @@ -9,8 +9,8 @@ type TableModelPricing struct { Mode string `gorm:"type:varchar(50);not null;uniqueIndex:idx_model_provider_mode" json:"mode"` // Costs - Text - InputCostPerToken float64 `gorm:"not null" json:"input_cost_per_token"` - OutputCostPerToken float64 `gorm:"not null" json:"output_cost_per_token"` + InputCostPerToken *float64 `gorm:"default:null" json:"input_cost_per_token,omitempty"` + OutputCostPerToken *float64 `gorm:"default:null" json:"output_cost_per_token,omitempty"` InputCostPerTokenBatches *float64 `gorm:"default:null;column:input_cost_per_token_batches" json:"input_cost_per_token_batches,omitempty"` OutputCostPerTokenBatches *float64 `gorm:"default:null;column:output_cost_per_token_batches" json:"output_cost_per_token_batches,omitempty"` InputCostPerTokenPriority *float64 `gorm:"default:null;column:input_cost_per_token_priority" json:"input_cost_per_token_priority,omitempty"` diff --git a/framework/configstore/tables/pricingoverride.go b/framework/configstore/tables/pricingoverride.go new file mode 100644 index 0000000000..e4b23e3069 --- /dev/null +++ b/framework/configstore/tables/pricingoverride.go @@ -0,0 +1,55 @@ +package tables + +import ( + "encoding/json" + "time" + + "github.com/maximhq/bifrost/core/schemas" + "gorm.io/gorm" +) + +// TablePricingOverride is the persistence model for governance pricing overrides. +type TablePricingOverride struct { + ID string `gorm:"primaryKey;type:varchar(255)" json:"id"` + Name string `gorm:"type:varchar(255);not null" json:"name"` + ScopeKind string `gorm:"type:varchar(50);index:idx_pricing_override_scope;not null" json:"scope_kind"` + VirtualKeyID *string `gorm:"type:varchar(255);index:idx_pricing_override_scope" json:"virtual_key_id,omitempty"` + ProviderID *string `gorm:"type:varchar(255);index:idx_pricing_override_scope" json:"provider_id,omitempty"` + ProviderKeyID *string `gorm:"type:varchar(255);index:idx_pricing_override_scope" json:"provider_key_id,omitempty"` + MatchType string `gorm:"type:varchar(20);index:idx_pricing_override_match;not null" json:"match_type"` + Pattern string `gorm:"type:varchar(255);not null" json:"pattern"` + RequestTypesJSON string `gorm:"type:text" json:"-"` + PricingPatchJSON string `gorm:"type:text" json:"pricing_patch,omitempty"` + ConfigHash string `gorm:"type:varchar(255);null" json:"config_hash,omitempty"` + CreatedAt time.Time `gorm:"index;not null" json:"created_at"` + UpdatedAt time.Time `gorm:"index;not null" json:"updated_at"` + + RequestTypes []schemas.RequestType `gorm:"-" json:"request_types,omitempty"` +} + +// TableName returns the backing table name for governance pricing overrides. +func (TablePricingOverride) TableName() string { return "governance_pricing_overrides" } + +// BeforeSave serializes virtual fields into their JSON columns before persistence. +func (p *TablePricingOverride) BeforeSave(tx *gorm.DB) error { + if len(p.RequestTypes) > 0 { + b, err := json.Marshal(p.RequestTypes) + if err != nil { + return err + } + p.RequestTypesJSON = string(b) + } else { + p.RequestTypesJSON = "[]" + } + return nil +} + +// AfterFind restores virtual fields from their persisted JSON columns. +func (p *TablePricingOverride) AfterFind(tx *gorm.DB) error { + if p.RequestTypesJSON != "" { + if err := json.Unmarshal([]byte(p.RequestTypesJSON), &p.RequestTypes); err != nil { + return err + } + } + return nil +} diff --git a/framework/configstore/tables/provider.go b/framework/configstore/tables/provider.go index 5042ca82f1..c76e0db0b2 100644 --- a/framework/configstore/tables/provider.go +++ b/framework/configstore/tables/provider.go @@ -21,7 +21,6 @@ type TableProvider struct { ConcurrencyBufferJSON string `gorm:"type:text" json:"-"` // JSON serialized schemas.ConcurrencyAndBufferSize ProxyConfigJSON string `gorm:"type:text" json:"-"` // JSON serialized schemas.ProxyConfig CustomProviderConfigJSON string `gorm:"type:text" json:"-"` // JSON serialized schemas.CustomProviderConfig - PricingOverridesJSON string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ProviderPricingOverride SendBackRawRequest bool `json:"send_back_raw_request"` SendBackRawResponse bool `json:"send_back_raw_response"` StoreRawRequestResponse bool `json:"store_raw_request_response"` @@ -37,8 +36,7 @@ type TableProvider struct { ProxyConfig *schemas.ProxyConfig `gorm:"-" json:"proxy_config,omitempty"` // Custom provider fields - CustomProviderConfig *schemas.CustomProviderConfig `gorm:"-" json:"custom_provider_config,omitempty"` - PricingOverrides []schemas.ProviderPricingOverride `gorm:"-" json:"pricing_overrides,omitempty"` + CustomProviderConfig *schemas.CustomProviderConfig `gorm:"-" json:"custom_provider_config,omitempty"` // Foreign keys Models []TableModel `gorm:"foreignKey:ProviderID;constraint:OnDelete:CASCADE" json:"models"` @@ -100,16 +98,6 @@ func (p *TableProvider) BeforeSave(tx *gorm.DB) error { } p.CustomProviderConfigJSON = string(data) } - if p.PricingOverrides != nil { - data, err := json.Marshal(p.PricingOverrides) - if err != nil { - return err - } - p.PricingOverridesJSON = string(data) - } else { - p.PricingOverridesJSON = "" - } - // Validate governance fields if p.BudgetID != nil && strings.TrimSpace(*p.BudgetID) == "" { return fmt.Errorf("budget_id cannot be an empty string") @@ -173,13 +161,5 @@ func (p *TableProvider) AfterFind(tx *gorm.DB) error { p.CustomProviderConfig = &customConfig } - if p.PricingOverridesJSON != "" { - var overrides []schemas.ProviderPricingOverride - if err := json.Unmarshal([]byte(p.PricingOverridesJSON), &overrides); err != nil { - return err - } - p.PricingOverrides = overrides - } - return nil } diff --git a/framework/logstore/tables.go b/framework/logstore/tables.go index dae5502078..c408a8f408 100644 --- a/framework/logstore/tables.go +++ b/framework/logstore/tables.go @@ -29,22 +29,22 @@ const ( // SearchFilters represents the available filters for log searches type SearchFilters struct { - Providers []string `json:"providers,omitempty"` - Models []string `json:"models,omitempty"` - Status []string `json:"status,omitempty"` - Objects []string `json:"objects,omitempty"` // For filtering by request type (chat.completion, text.completion, embedding) - SelectedKeyIDs []string `json:"selected_key_ids,omitempty"` - VirtualKeyIDs []string `json:"virtual_key_ids,omitempty"` - RoutingRuleIDs []string `json:"routing_rule_ids,omitempty"` - RoutingEngineUsed []string `json:"routing_engine_used,omitempty"` // For filtering by routing engine (routing-rule, governance, loadbalancing) - StartTime *time.Time `json:"start_time,omitempty"` - EndTime *time.Time `json:"end_time,omitempty"` - MinLatency *float64 `json:"min_latency,omitempty"` - MaxLatency *float64 `json:"max_latency,omitempty"` - MinTokens *int `json:"min_tokens,omitempty"` - MaxTokens *int `json:"max_tokens,omitempty"` - MinCost *float64 `json:"min_cost,omitempty"` - MaxCost *float64 `json:"max_cost,omitempty"` + Providers []string `json:"providers,omitempty"` + Models []string `json:"models,omitempty"` + Status []string `json:"status,omitempty"` + Objects []string `json:"objects,omitempty"` // For filtering by request type (chat.completion, text.completion, embedding) + SelectedKeyIDs []string `json:"selected_key_ids,omitempty"` + VirtualKeyIDs []string `json:"virtual_key_ids,omitempty"` + RoutingRuleIDs []string `json:"routing_rule_ids,omitempty"` + RoutingEngineUsed []string `json:"routing_engine_used,omitempty"` // For filtering by routing engine (routing-rule, governance, loadbalancing) + StartTime *time.Time `json:"start_time,omitempty"` + EndTime *time.Time `json:"end_time,omitempty"` + MinLatency *float64 `json:"min_latency,omitempty"` + MaxLatency *float64 `json:"max_latency,omitempty"` + MinTokens *int `json:"min_tokens,omitempty"` + MaxTokens *int `json:"max_tokens,omitempty"` + MinCost *float64 `json:"min_cost,omitempty"` + MaxCost *float64 `json:"max_cost,omitempty"` MissingCostOnly bool `json:"missing_cost_only,omitempty"` ContentSearch string `json:"content_search,omitempty"` MetadataFilters map[string]string `json:"metadata_filters,omitempty"` // key=metadataKey, value=metadataValue for filtering by metadata @@ -78,59 +78,59 @@ type SearchStats struct { // Log represents a complete log entry for a request/response cycle // This is the GORM model with appropriate tags type Log struct { - ID string `gorm:"primaryKey;type:varchar(255)" json:"id"` - ParentRequestID *string `gorm:"type:varchar(255)" json:"parent_request_id"` - Timestamp time.Time `gorm:"index;index:idx_logs_ts_provider_status,priority:1;not null" json:"timestamp"` - Object string `gorm:"type:varchar(255);index;not null;column:object_type" json:"object"` // text.completion, chat.completion, or embedding - Provider string `gorm:"type:varchar(255);index;index:idx_logs_ts_provider_status,priority:2;not null" json:"provider"` - Model string `gorm:"type:varchar(255);index;not null" json:"model"` - NumberOfRetries int `gorm:"default:0" json:"number_of_retries"` - FallbackIndex int `gorm:"default:0" json:"fallback_index"` - SelectedKeyID string `gorm:"type:varchar(255);index:idx_logs_selected_key_id" json:"selected_key_id"` - SelectedKeyName string `gorm:"type:varchar(255)" json:"selected_key_name"` - VirtualKeyID *string `gorm:"type:varchar(255);index:idx_logs_virtual_key_id" json:"virtual_key_id"` - VirtualKeyName *string `gorm:"type:varchar(255)" json:"virtual_key_name"` - RoutingEnginesUsedStr *string `gorm:"type:varchar(255);column:routing_engines_used" json:"-"` // Comma-separated routing engines - RoutingRuleID *string `gorm:"type:varchar(255);index:idx_logs_routing_rule_id" json:"routing_rule_id"` - RoutingRuleName *string `gorm:"type:varchar(255)" json:"routing_rule_name"` - InputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ChatMessage - ResponsesInputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ResponsesMessage - OutputMessage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ChatMessage - ResponsesOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ResponsesMessage - EmbeddingOutput string `gorm:"type:text" json:"-"` // JSON serialized [][]float32 - RerankOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.RerankResult - Params string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ModelParameters - Tools string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Tool - ToolCalls string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ToolCall (For backward compatibility, tool calls are now in the content) - SpeechInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.SpeechInput - TranscriptionInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.TranscriptionInput - ImageGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ImageGenerationInput - VideoGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.VideoGenerationInput - SpeechOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostSpeech - TranscriptionOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostTranscribe - ImageGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostImageGenerationResponse - ListModelsOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Model - VideoGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoGenerationResponse - VideoRetrieveOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoRetrieveResponse - VideoDownloadOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDownloadResponse - VideoListOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoListResponse - VideoDeleteOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDeleteResponse - CacheDebug string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostCacheDebug - Latency *float64 `gorm:"index:idx_logs_latency" json:"latency,omitempty"` - TokenUsage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.LLMUsage - Cost *float64 `gorm:"index" json:"cost,omitempty"` // Cost in dollars (total cost of the request - includes cache lookup cost) - Status string `gorm:"type:varchar(50);index;index:idx_logs_ts_provider_status,priority:3;not null" json:"status"` // "processing", "success", or "error" - ErrorDetails string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostError - Stream bool `gorm:"default:false" json:"stream"` // true if this was a streaming response - ContentSummary string `gorm:"type:text" json:"-"` - RawRequest string `gorm:"type:text" json:"raw_request"` // Populated when `send-back-raw-request` is on - RawResponse string `gorm:"type:text" json:"raw_response"` // Populated when `send-back-raw-response` is on + ID string `gorm:"primaryKey;type:varchar(255)" json:"id"` + ParentRequestID *string `gorm:"type:varchar(255)" json:"parent_request_id"` + Timestamp time.Time `gorm:"index;index:idx_logs_ts_provider_status,priority:1;not null" json:"timestamp"` + Object string `gorm:"type:varchar(255);index;not null;column:object_type" json:"object"` // text.completion, chat.completion, or embedding + Provider string `gorm:"type:varchar(255);index;index:idx_logs_ts_provider_status,priority:2;not null" json:"provider"` + Model string `gorm:"type:varchar(255);index;not null" json:"model"` + NumberOfRetries int `gorm:"default:0" json:"number_of_retries"` + FallbackIndex int `gorm:"default:0" json:"fallback_index"` + SelectedKeyID string `gorm:"type:varchar(255);index:idx_logs_selected_key_id" json:"selected_key_id"` + SelectedKeyName string `gorm:"type:varchar(255)" json:"selected_key_name"` + VirtualKeyID *string `gorm:"type:varchar(255);index:idx_logs_virtual_key_id" json:"virtual_key_id"` + VirtualKeyName *string `gorm:"type:varchar(255)" json:"virtual_key_name"` + RoutingEnginesUsedStr *string `gorm:"type:varchar(255);column:routing_engines_used" json:"-"` // Comma-separated routing engines + RoutingRuleID *string `gorm:"type:varchar(255);index:idx_logs_routing_rule_id" json:"routing_rule_id"` + RoutingRuleName *string `gorm:"type:varchar(255)" json:"routing_rule_name"` + InputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ChatMessage + ResponsesInputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ResponsesMessage + OutputMessage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ChatMessage + ResponsesOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ResponsesMessage + EmbeddingOutput string `gorm:"type:text" json:"-"` // JSON serialized [][]float32 + RerankOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.RerankResult + Params string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ModelParameters + Tools string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Tool + ToolCalls string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ToolCall (For backward compatibility, tool calls are now in the content) + SpeechInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.SpeechInput + TranscriptionInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.TranscriptionInput + ImageGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ImageGenerationInput + VideoGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.VideoGenerationInput + SpeechOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostSpeech + TranscriptionOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostTranscribe + ImageGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostImageGenerationResponse + ListModelsOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Model + VideoGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoGenerationResponse + VideoRetrieveOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoRetrieveResponse + VideoDownloadOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDownloadResponse + VideoListOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoListResponse + VideoDeleteOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDeleteResponse + CacheDebug string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostCacheDebug + Latency *float64 `gorm:"index:idx_logs_latency" json:"latency,omitempty"` + TokenUsage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.LLMUsage + Cost *float64 `gorm:"index" json:"cost,omitempty"` // Cost in dollars (total cost of the request - includes cache lookup cost) + Status string `gorm:"type:varchar(50);index;index:idx_logs_ts_provider_status,priority:3;not null" json:"status"` // "processing", "success", or "error" + ErrorDetails string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostError + Stream bool `gorm:"default:false" json:"stream"` // true if this was a streaming response + ContentSummary string `gorm:"type:text" json:"-"` + RawRequest string `gorm:"type:text" json:"raw_request"` // Populated when `send-back-raw-request` is on + RawResponse string `gorm:"type:text" json:"raw_response"` // Populated when `send-back-raw-response` is on PassthroughRequestBody string `gorm:"type:text" json:"passthrough_request_body,omitempty"` // Raw body for passthrough requests (UTF-8) PassthroughResponseBody string `gorm:"type:text" json:"passthrough_response_body,omitempty"` // Raw body for passthrough responses (UTF-8) - RoutingEngineLogs string `gorm:"type:text" json:"routing_engine_logs,omitempty"` // Formatted routing engine decision logs - Metadata *string `gorm:"type:text" json:"-"` // JSON serialized map[string]interface{} - IsLargePayloadRequest bool `gorm:"default:false" json:"is_large_payload_request"` - IsLargePayloadResponse bool `gorm:"default:false" json:"is_large_payload_response"` + RoutingEngineLogs string `gorm:"type:text" json:"routing_engine_logs,omitempty"` // Formatted routing engine decision logs + Metadata *string `gorm:"type:text" json:"-"` // JSON serialized map[string]interface{} + IsLargePayloadRequest bool `gorm:"default:false" json:"is_large_payload_request"` + IsLargePayloadResponse bool `gorm:"default:false" json:"is_large_payload_response"` // Denormalized token fields for easier querying PromptTokens int `gorm:"default:0" json:"-"` diff --git a/framework/modelcatalog/main.go b/framework/modelcatalog/main.go index be25cb69c8..3409074220 100644 --- a/framework/modelcatalog/main.go +++ b/framework/modelcatalog/main.go @@ -38,10 +38,13 @@ type ModelCatalog struct { pricingData map[string]configstoreTables.TableModelPricing mu sync.RWMutex - // Provider-level pricing overrides are maintained separately to avoid contention - // with pricing cache rebuilds. - compiledOverrides map[schemas.ModelProvider][]compiledProviderPricingOverride - overridesMu sync.RWMutex + // rawOverrides is the canonical list of all active overrides. It exists solely + // to support incremental mutations: UpsertPricingOverrides and DeletePricingOverride + // iterate over it to rebuild the list, then derive customPricing from it. + // customPricing is the actual lookup structure used at query time. + rawOverrides []PricingOverride + customPricing *customPricingData + overridesMu sync.RWMutex modelPool map[schemas.ModelProvider][]string unfilteredModelPool map[schemas.ModelProvider][]string // model pool without allowed models filtering @@ -61,10 +64,13 @@ type PricingEntry struct { BaseModel string `json:"base_model,omitempty"` Provider string `json:"provider"` Mode string `json:"mode"` + PricingOptions +} +type PricingOptions struct { // Costs - Text - InputCostPerToken float64 `json:"input_cost_per_token"` - OutputCostPerToken float64 `json:"output_cost_per_token"` + InputCostPerToken *float64 `json:"input_cost_per_token,omitempty"` + OutputCostPerToken *float64 `json:"output_cost_per_token,omitempty"` InputCostPerTokenBatches *float64 `json:"input_cost_per_token_batches,omitempty"` OutputCostPerTokenBatches *float64 `json:"output_cost_per_token_batches,omitempty"` InputCostPerTokenPriority *float64 `json:"input_cost_per_token_priority,omitempty"` @@ -194,7 +200,6 @@ func Init(ctx context.Context, config *Config, configStore configstore.ConfigSto configStore: configStore, logger: logger, pricingData: make(map[string]configstoreTables.TableModelPricing), - compiledOverrides: make(map[schemas.ModelProvider][]compiledProviderPricingOverride), modelPool: make(map[schemas.ModelProvider][]string), unfilteredModelPool: make(map[schemas.ModelProvider][]string), baseModelIndex: make(map[string]string), @@ -251,6 +256,10 @@ func Init(ctx context.Context, config *Config, configStore configstore.ConfigSto // Populate model pool with normalized providers from pricing data mc.populateModelPoolFromPricingData() + if err := mc.loadPricingOverridesFromStore(ctx); err != nil { + return nil, fmt.Errorf("failed to load pricing overrides: %w", err) + } + // Start background sync worker mc.syncCtx, mc.syncCancel = context.WithCancel(ctx) mc.startSyncWorker(mc.syncCtx) @@ -321,6 +330,10 @@ func (mc *ModelCatalog) ForceReloadPricing(ctx context.Context) error { // Rebuild model pool from updated pricing data mc.populateModelPoolFromPricingData() + if err := mc.loadPricingOverridesFromStore(ctx); err != nil { + return fmt.Errorf("failed to load pricing overrides: %w", err) + } + // Also sync model parameters if err := mc.syncModelParameters(ctx); err != nil { mc.logger.Warn("failed to sync model parameters during force reload: %v", err) @@ -784,6 +797,79 @@ func (mc *ModelCatalog) RefineModelForProvider(provider schemas.ModelProvider, m return model, nil } +// SetPricingOverrides replaces the full in-memory pricing override set. +func (mc *ModelCatalog) SetPricingOverrides(rows []configstoreTables.TablePricingOverride) error { + seen := make(map[string]int, len(rows)) + overrides := make([]PricingOverride, 0, len(rows)) + for i := range rows { + o, err := convertTablePricingOverrideToPricingOverride(&rows[i]) + if err != nil { + return err + } + if idx, exists := seen[o.ID]; exists { + overrides[idx] = o // last entry wins for duplicate IDs + } else { + seen[o.ID] = len(overrides) + overrides = append(overrides, o) + } + } + mc.overridesMu.Lock() + mc.rawOverrides = overrides + mc.customPricing = buildCustomPricingData(overrides) + mc.overridesMu.Unlock() + return nil +} + +// UpsertPricingOverrides inserts or replaces one or more pricing overrides in a single +// operation, rebuilding the lookup map only once at the end. +func (mc *ModelCatalog) UpsertPricingOverrides(rows ...*configstoreTables.TablePricingOverride) error { + // Deduplicate the input batch by ID (last entry wins) and build the + // incoming set for O(1) lookup when filtering existing rawOverrides. + seenIncoming := make(map[string]int, len(rows)) + overrides := make([]PricingOverride, 0, len(rows)) + for _, row := range rows { + o, err := convertTablePricingOverrideToPricingOverride(row) + if err != nil { + return err + } + if idx, exists := seenIncoming[o.ID]; exists { + overrides[idx] = o // last entry wins for duplicate IDs + } else { + seenIncoming[o.ID] = len(overrides) + overrides = append(overrides, o) + } + } + + mc.overridesMu.Lock() + defer mc.overridesMu.Unlock() + + updated := make([]PricingOverride, 0, len(mc.rawOverrides)+len(overrides)) + for _, o := range mc.rawOverrides { + if _, replacing := seenIncoming[o.ID]; !replacing { + updated = append(updated, o) + } + } + updated = append(updated, overrides...) + mc.rawOverrides = updated + mc.customPricing = buildCustomPricingData(updated) + return nil +} + +// DeletePricingOverride removes a pricing override by ID. +func (mc *ModelCatalog) DeletePricingOverride(id string) { + mc.overridesMu.Lock() + defer mc.overridesMu.Unlock() + + updated := make([]PricingOverride, 0, len(mc.rawOverrides)) + for _, o := range mc.rawOverrides { + if o.ID != id { + updated = append(updated, o) + } + } + mc.rawOverrides = updated + mc.customPricing = buildCustomPricingData(updated) +} + // IsTextCompletionSupported checks if a model supports text completion for the given provider. // Returns true if the model has pricing data for text completion ("text_completion"), // false otherwise. This is used by the litellmcompat plugin to determine whether to @@ -878,7 +964,6 @@ func NewTestCatalog(baseModelIndex map[string]string) *ModelCatalog { unfilteredModelPool: make(map[schemas.ModelProvider][]string), baseModelIndex: baseModelIndex, pricingData: make(map[string]configstoreTables.TableModelPricing), - compiledOverrides: make(map[schemas.ModelProvider][]compiledProviderPricingOverride), done: make(chan struct{}), } } diff --git a/framework/modelcatalog/main_test.go b/framework/modelcatalog/main_test.go index 6715989743..c406a951f1 100644 --- a/framework/modelcatalog/main_test.go +++ b/framework/modelcatalog/main_test.go @@ -17,10 +17,9 @@ func newTestCatalog(modelPool map[schemas.ModelProvider][]string, baseModelIndex baseModelIndex = make(map[string]string) } return &ModelCatalog{ - modelPool: modelPool, - baseModelIndex: baseModelIndex, - pricingData: make(map[string]configstoreTables.TableModelPricing), - compiledOverrides: make(map[schemas.ModelProvider][]compiledProviderPricingOverride), + modelPool: modelPool, + baseModelIndex: baseModelIndex, + pricingData: make(map[string]configstoreTables.TableModelPricing), } } diff --git a/framework/modelcatalog/overrides.go b/framework/modelcatalog/overrides.go index 6eef025a48..f284a80a8e 100644 --- a/framework/modelcatalog/overrides.go +++ b/framework/modelcatalog/overrides.go @@ -1,279 +1,456 @@ package modelcatalog import ( + "context" "fmt" - "regexp" + "sort" "strings" "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/configstore" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" ) -type compiledProviderPricingOverride struct { - override schemas.ProviderPricingOverride - regex *regexp.Regexp - requestModes map[string]struct{} - hasRequestFilter bool - literalChars int - order int +// PricingLookupScopes carries the runtime identifiers used to resolve scoped +// pricing overrides during cost calculation. +type PricingLookupScopes struct { + VirtualKeyID string + SelectedKeyID string + Provider string } -func (mc *ModelCatalog) SetProviderPricingOverrides(provider schemas.ModelProvider, overrides []schemas.ProviderPricingOverride) error { - compiled := make([]compiledProviderPricingOverride, 0, len(overrides)) - for i := range overrides { - item, err := compileProviderPricingOverride(i, overrides[i]) - if err != nil { - return fmt.Errorf("invalid pricing override for provider %s at index %d: %w", provider, i, err) - } - compiled = append(compiled, item) - } - - mc.overridesMu.Lock() - defer mc.overridesMu.Unlock() - if len(compiled) == 0 { - delete(mc.compiledOverrides, provider) +// PricingLookupScopesFromContext builds a PricingLookupScopes from a BifrostContext. +// It reads the governance virtual key ID (not the raw VK token) and the selected key ID. +// provider should be the provider name string (e.g. "openai"), pass "" if unavailable. +// Returns nil only when ctx is nil. An empty scopes value is still returned when all fields +// are empty so that global-scope overrides are always evaluated. +// DO NOT USE THIS FUNCTION IN A GO ROUTINE. This is because it reads from ctx which is cancelled when the request ends. +// Better to call it in PostHooks synchronously and then pass the scopes object to the pricing manager. +// Only use this in go routines when you know for sure that the request will not end before the go routine completes. +func PricingLookupScopesFromContext(ctx *schemas.BifrostContext, provider string) *PricingLookupScopes { + if ctx == nil { return nil } - mc.compiledOverrides[provider] = compiled - return nil + virtualKeyID, _ := ctx.Value(schemas.BifrostContextKeyGovernanceVirtualKeyID).(string) + selectedKeyID, _ := ctx.Value(schemas.BifrostContextKeySelectedKeyID).(string) + return &PricingLookupScopes{ + VirtualKeyID: virtualKeyID, + SelectedKeyID: selectedKeyID, + Provider: provider, + } } -func (mc *ModelCatalog) DeleteProviderPricingOverrides(provider schemas.ModelProvider) { - mc.overridesMu.Lock() - defer mc.overridesMu.Unlock() - delete(mc.compiledOverrides, provider) -} +// ScopeKind identifies which governance scope an override applies to. +type ScopeKind string -func (mc *ModelCatalog) applyPricingOverrides(provider schemas.ModelProvider, model string, requestType schemas.RequestType, pricing configstoreTables.TableModelPricing) configstoreTables.TableModelPricing { - mc.overridesMu.RLock() - overrides := mc.compiledOverrides[provider] - mc.overridesMu.RUnlock() - if len(overrides) == 0 { - return pricing - } +const ( + ScopeKindGlobal ScopeKind = "global" + ScopeKindProvider ScopeKind = "provider" + ScopeKindProviderKey ScopeKind = "provider_key" + ScopeKindVirtualKey ScopeKind = "virtual_key" + ScopeKindVirtualKeyProvider ScopeKind = "virtual_key_provider" + ScopeKindVirtualKeyProviderKey ScopeKind = "virtual_key_provider_key" +) - modelCandidates := []string{model} - mode := normalizeRequestType(requestType) - best := selectBestOverride(overrides, modelCandidates, mode) - if best == nil { - return pricing - } +// MatchType controls how an override pattern is matched against model names. +type MatchType string - return patchPricing(pricing, best.override) +const ( + MatchTypeExact MatchType = "exact" + MatchTypeWildcard MatchType = "wildcard" +) + +// PricingOverride describes a scoped pricing override shared across config storage, +// model catalog compilation, and governance APIs. +type PricingOverride struct { + ID string `json:"id"` + Name string `json:"name"` + ScopeKind ScopeKind `json:"scope_kind"` + VirtualKeyID *string `json:"virtual_key_id,omitempty"` + ProviderID *string `json:"provider_id,omitempty"` + ProviderKeyID *string `json:"provider_key_id,omitempty"` + MatchType MatchType `json:"match_type"` + Pattern string `json:"pattern"` + RequestTypes []schemas.RequestType `json:"request_types,omitempty"` + Options PricingOptions `json:"options"` } -func compileProviderPricingOverride(order int, override schemas.ProviderPricingOverride) (compiledProviderPricingOverride, error) { - pattern := strings.TrimSpace(override.ModelPattern) - if pattern == "" { - return compiledProviderPricingOverride{}, fmt.Errorf("model_pattern cannot be empty") - } +// customPricingEntry is a single flattened override ready for lookup. +type customPricingEntry struct { + id string + scopeKind ScopeKind + virtualKeyID string + providerID string + providerKeyID string + pattern string // exact model name, or wildcard prefix (trailing * stripped) + wildcard bool + requestModes map[string]struct{} // always non-nil for valid overrides + options PricingOptions +} - result := compiledProviderPricingOverride{ - override: override, - requestModes: make(map[string]struct{}), - order: order, - } - result.override.ModelPattern = pattern +// customPricingData is the in-memory lookup structure for pricing overrides. +// Exact matches are indexed by model name; wildcards are a flat slice. +type customPricingData struct { + exact map[string][]customPricingEntry + wildcard []customPricingEntry +} - switch override.MatchType { - case schemas.PricingOverrideMatchExact: - result.literalChars = len(pattern) - case schemas.PricingOverrideMatchWildcard: - if !strings.Contains(pattern, "*") { - return compiledProviderPricingOverride{}, fmt.Errorf("wildcard model_pattern must contain '*'") +// IsValid validates the shared pricing override contract before persistence or runtime use. +// +// Input: override — the PricingOverride to validate (receiver). +// Output: error — non-nil if any scope, pattern, or request-type constraint is violated. +func (override *PricingOverride) IsValid() error { + if err := override.validateScopeKind(); err != nil { + return err + } + if err := override.validatePattern(); err != nil { + return err + } + return override.validateRequestTypes() +} + +// validateScopeKind validates the scope identifiers required by override.ScopeKind. +// +// Input: override — receiver; ScopeKind and the three optional ID fields are inspected. +// Output: error — non-nil when required identifiers are absent or forbidden ones are present. +func (override *PricingOverride) validateScopeKind() error { + switch override.ScopeKind { + case ScopeKindGlobal: + if override.VirtualKeyID != nil || override.ProviderID != nil || override.ProviderKeyID != nil { + return fmt.Errorf("global scope_kind must not include scope identifiers") } - result.literalChars = len(strings.ReplaceAll(pattern, "*", "")) - case schemas.PricingOverrideMatchRegex: - re, err := regexp.Compile(pattern) - if err != nil { - return compiledProviderPricingOverride{}, fmt.Errorf("invalid regex model_pattern: %w", err) + case ScopeKindProvider: + if override.ProviderID == nil { + return fmt.Errorf("provider_id is required for provider scope_kind") } - result.regex = re - result.literalChars = len(pattern) - default: - return compiledProviderPricingOverride{}, fmt.Errorf("unsupported match_type: %s", override.MatchType) - } - - if len(override.RequestTypes) > 0 { - result.hasRequestFilter = true - for _, requestType := range override.RequestTypes { - mode := normalizeRequestType(requestType) - if mode == "unknown" { - return compiledProviderPricingOverride{}, fmt.Errorf("unsupported request_type: %s", requestType) - } - result.requestModes[mode] = struct{}{} + if override.VirtualKeyID != nil || override.ProviderKeyID != nil { + return fmt.Errorf("provider scope_kind only supports provider_id") + } + case ScopeKindProviderKey: + if override.ProviderKeyID == nil { + return fmt.Errorf("provider_key_id is required for provider_key scope_kind") + } + if override.VirtualKeyID != nil || override.ProviderID != nil { + return fmt.Errorf("provider_key scope_kind only supports provider_key_id") + } + case ScopeKindVirtualKey: + if override.VirtualKeyID == nil { + return fmt.Errorf("virtual_key_id is required for virtual_key scope_kind") + } + if override.ProviderID != nil || override.ProviderKeyID != nil { + return fmt.Errorf("virtual_key scope_kind only supports virtual_key_id") + } + case ScopeKindVirtualKeyProvider: + if override.VirtualKeyID == nil || override.ProviderID == nil { + return fmt.Errorf("virtual_key_id and provider_id are required for virtual_key_provider scope_kind") } + if override.ProviderKeyID != nil { + return fmt.Errorf("virtual_key_provider scope_kind does not support provider_key_id") + } + case ScopeKindVirtualKeyProviderKey: + if override.VirtualKeyID == nil || override.ProviderID == nil || override.ProviderKeyID == nil { + return fmt.Errorf("virtual_key_id, provider_id, and provider_key_id are required for virtual_key_provider_key scope_kind") + } + default: + return fmt.Errorf("unsupported scope_kind %q", override.ScopeKind) } - - return result, nil + return nil } -func selectBestOverride(overrides []compiledProviderPricingOverride, modelCandidates []string, mode string) *compiledProviderPricingOverride { - var best *compiledProviderPricingOverride - for i := range overrides { - candidate := &overrides[i] - if candidate.hasRequestFilter { - if _, ok := candidate.requestModes[mode]; !ok { - continue - } +// validatePattern checks that Pattern is non-empty and consistent with MatchType. +// +// Input: override — receiver; Pattern and MatchType are inspected. +// Output: error — non-nil when the pattern is empty, contains a wildcard for exact mode, +// +// or does not end with a single trailing "*" for wildcard mode. +func (override *PricingOverride) validatePattern() error { + pattern := strings.TrimSpace(override.Pattern) + if pattern == "" { + return fmt.Errorf("pattern is required") + } + switch override.MatchType { + case MatchTypeExact: + if strings.Contains(pattern, "*") { + return fmt.Errorf("exact match pattern must not contain wildcards") } - if !matchesAnyModel(candidate, modelCandidates) { - continue + case MatchTypeWildcard: + if !strings.HasSuffix(pattern, "*") { + return fmt.Errorf("wildcard pattern must end with *") } - if isBetterOverride(candidate, best) { - best = candidate + if strings.Count(pattern, "*") != 1 { + return fmt.Errorf("wildcard pattern must contain exactly one trailing *") } + default: + return fmt.Errorf("unsupported match_type %q", override.MatchType) } - return best + return nil } -func matchesAnyModel(override *compiledProviderPricingOverride, modelCandidates []string) bool { - for _, model := range modelCandidates { - if matchesModel(override, model) { - return true +// validateRequestTypes checks that RequestTypes is non-empty and that every entry is a +// supported base request type. Stream variants (e.g. chat_completion_stream) are rejected — +// the base type (chat_completion) already covers both streaming and non-streaming requests. +// +// Input: override — receiver; RequestTypes slice is inspected. +// Output: error — non-nil if RequestTypes is empty, or contains an unsupported or stream variant. +func (override *PricingOverride) validateRequestTypes() error { + if len(override.RequestTypes) == 0 { + return fmt.Errorf("request_types is required and must contain at least one value") + } + for _, rt := range override.RequestTypes { + if normalizeStreamRequestType(rt) != rt { + return fmt.Errorf("unsupported request_type %q: use the base type (e.g. %q covers both streaming and non-streaming)", rt, normalizeStreamRequestType(rt)) + } + if normalizeRequestType(rt) == "unknown" { + return fmt.Errorf("unsupported request_type %q", rt) } } - return false + return nil } -func matchesModel(override *compiledProviderPricingOverride, model string) bool { - switch override.override.MatchType { - case schemas.PricingOverrideMatchExact: - return model == override.override.ModelPattern - case schemas.PricingOverrideMatchWildcard: - return wildcardMatch(override.override.ModelPattern, model) - case schemas.PricingOverrideMatchRegex: - return override.regex != nil && override.regex.MatchString(model) - default: - return false +// matchesScope reports whether the entry's governance scope matches the runtime identifiers. +// +// Input: scopes — runtime VirtualKeyID, SelectedKeyID, and Provider to match against. +// Output: bool — true when the entry's scope kind and stored IDs align with scopes. +func (e *customPricingEntry) matchesScope(scopes PricingLookupScopes) bool { + switch e.scopeKind { + case ScopeKindGlobal: + return true + case ScopeKindProvider: + return e.providerID == scopes.Provider + case ScopeKindProviderKey: + return e.providerKeyID == scopes.SelectedKeyID + case ScopeKindVirtualKey: + return e.virtualKeyID == scopes.VirtualKeyID + case ScopeKindVirtualKeyProvider: + return e.virtualKeyID == scopes.VirtualKeyID && e.providerID == scopes.Provider + case ScopeKindVirtualKeyProviderKey: + return e.virtualKeyID == scopes.VirtualKeyID && e.providerID == scopes.Provider && e.providerKeyID == scopes.SelectedKeyID } + return false } -func overridePriority(matchType schemas.PricingOverrideMatchType) int { - switch matchType { - case schemas.PricingOverrideMatchExact: - return 0 - case schemas.PricingOverrideMatchWildcard: - return 1 - case schemas.PricingOverrideMatchRegex: - return 2 - default: - return 3 - } +// matchesMode reports whether the entry applies to the given normalized request mode. +// +// Input: mode — normalized request type string (e.g. "chat", "embedding"). +// Output: bool — true when requestModes contains mode. +func (e *customPricingEntry) matchesMode(mode string) bool { + _, ok := e.requestModes[mode] + return ok } -func isBetterOverride(candidate, best *compiledProviderPricingOverride) bool { - if best == nil { - return true - } - - candidatePriority := overridePriority(candidate.override.MatchType) - bestPriority := overridePriority(best.override.MatchType) - if candidatePriority != bestPriority { - return candidatePriority < bestPriority +// resolve walks the 6-scope priority hierarchy and returns the first matching +// pricing patch for the given model, request mode, and runtime scopes. +// +// Input: model — exact model name being priced. +// +// mode — normalized request type string (e.g. "chat", "embedding"). +// scopes — runtime governance identifiers used to narrow the scope search. +// +// Output: *PricingOptions — pointer to the first matching override's options, or nil if none match. +func (c *customPricingData) resolve(model, mode string, scopes PricingLookupScopes) *PricingOptions { + for _, scopeKind := range scopePriorityOrder(scopes) { + for i := range c.exact[model] { + e := &c.exact[model][i] + if e.scopeKind == scopeKind && e.matchesScope(scopes) && e.matchesMode(mode) { + return &e.options + } + } + for i := range c.wildcard { + e := &c.wildcard[i] + if e.scopeKind == scopeKind && e.matchesScope(scopes) && strings.HasPrefix(model, e.pattern) && e.matchesMode(mode) { + return &e.options + } + } } + return nil +} - if candidate.hasRequestFilter != best.hasRequestFilter { - return candidate.hasRequestFilter - } +// scopePriorityOrder returns scope kinds in most-specific-first order, +// skipping scopes that can't match given the available runtime identifiers. +// +// Input: scopes — runtime governance identifiers; empty fields cause the corresponding scope kinds to be omitted. +// Output: []ScopeKind — ordered list from most-specific (VirtualKeyProviderKey) to least-specific (Global). +func scopePriorityOrder(scopes PricingLookupScopes) []ScopeKind { + order := make([]ScopeKind, 0, 6) + if scopes.VirtualKeyID != "" && scopes.Provider != "" && scopes.SelectedKeyID != "" { + order = append(order, ScopeKindVirtualKeyProviderKey) + } + if scopes.VirtualKeyID != "" && scopes.Provider != "" { + order = append(order, ScopeKindVirtualKeyProvider) + } + if scopes.VirtualKeyID != "" { + order = append(order, ScopeKindVirtualKey) + } + if scopes.SelectedKeyID != "" { + order = append(order, ScopeKindProviderKey) + } + if scopes.Provider != "" { + order = append(order, ScopeKindProvider) + } + order = append(order, ScopeKindGlobal) + return order +} - if candidate.literalChars != best.literalChars { - return candidate.literalChars > best.literalChars +// buildCustomPricingData constructs a customPricingData lookup structure from a raw override slice. +// +// Input: overrides — slice of validated PricingOverride records loaded from the config store. +// Output: *customPricingData — ready-to-query structure with exact and wildcard indexes populated. +func buildCustomPricingData(overrides []PricingOverride) *customPricingData { + data := &customPricingData{ + exact: make(map[string][]customPricingEntry, len(overrides)), + } + for _, o := range overrides { + entry := customPricingEntry{ + id: o.ID, + scopeKind: o.ScopeKind, + options: o.Options, + } + if o.VirtualKeyID != nil { + entry.virtualKeyID = *o.VirtualKeyID + } + if o.ProviderID != nil { + entry.providerID = *o.ProviderID + } + if o.ProviderKeyID != nil { + entry.providerKeyID = *o.ProviderKeyID + } + entry.requestModes = make(map[string]struct{}, len(o.RequestTypes)) + for _, rt := range o.RequestTypes { + entry.requestModes[normalizeRequestType(rt)] = struct{}{} + } + pattern := strings.TrimSpace(o.Pattern) + switch o.MatchType { + case MatchTypeExact: + entry.pattern = pattern + data.exact[pattern] = append(data.exact[pattern], entry) + case MatchTypeWildcard: + entry.pattern = strings.TrimSuffix(pattern, "*") + entry.wildcard = true + data.wildcard = append(data.wildcard, entry) + } } - - return candidate.order < best.order + // Sort wildcards by descending prefix length so more-specific patterns (e.g. "gpt-4*") + // are checked before broader ones (e.g. "gpt-*"), making precedence deterministic. + sort.Slice(data.wildcard, func(i, j int) bool { + return len(data.wildcard[i].pattern) > len(data.wildcard[j].pattern) + }) + return data } -func wildcardMatch(pattern, model string) bool { - parts := strings.Split(pattern, "*") - if len(parts) == 1 { - return model == pattern - } +// applyPricingOverrides resolves any active scoped pricing override for the given model +// and request type, then patches the catalog base pricing with the override values. +// It returns the original pricing unchanged when no custom pricing tree is loaded or +// when the request type cannot be mapped to a known pricing mode. +// +// Input: model — exact model name being priced. +// +// requestType — the request type used to derive the pricing mode. +// pricing — base pricing row from the catalog to patch. +// scopes — runtime governance identifiers used to narrow the override scope. +// +// Output: TableModelPricing — patched pricing row, or pricing unchanged if no override matches. +// bool — true when an override was applied, false otherwise. +func (mc *ModelCatalog) applyPricingOverrides(model string, requestType schemas.RequestType, pricing configstoreTables.TableModelPricing, scopes PricingLookupScopes) (configstoreTables.TableModelPricing, bool) { + mc.overridesMu.RLock() + custom := mc.customPricing + mc.overridesMu.RUnlock() - remaining := model - if parts[0] != "" { - if !strings.HasPrefix(remaining, parts[0]) { - return false - } - remaining = remaining[len(parts[0]):] + if custom == nil { + return pricing, false } - for i := 1; i < len(parts)-1; i++ { - part := parts[i] - if part == "" { - continue - } - index := strings.Index(remaining, part) - if index < 0 { - return false - } - remaining = remaining[index+len(part):] + mode := normalizeRequestType(requestType) + if mode == "unknown" { + return pricing, false } - last := parts[len(parts)-1] - if last == "" { - return true + if patch := custom.resolve(model, mode, scopes); patch != nil { + return patchPricing(pricing, *patch), true } - return strings.HasSuffix(remaining, last) + return pricing, false } -func patchPricing(pricing configstoreTables.TableModelPricing, override schemas.ProviderPricingOverride) configstoreTables.TableModelPricing { +// patchPricing applies override values onto a copy of the base pricing row. +// For all fields, a non-nil override pointer replaces the corresponding destination value; +// a nil override leaves the base value intact. +// The original pricing row is never modified; a patched copy is always returned. +// +// Input: pricing — base pricing row from the catalog. +// +// override — pricing options sourced from the matched override entry. +// +// Output: TableModelPricing — shallow copy of pricing with override fields applied. +func patchPricing(pricing configstoreTables.TableModelPricing, override PricingOptions) configstoreTables.TableModelPricing { patched := pricing - if override.InputCostPerToken != nil { - patched.InputCostPerToken = *override.InputCostPerToken - } - if override.OutputCostPerToken != nil { - patched.OutputCostPerToken = *override.OutputCostPerToken - } - if override.InputCostPerVideoPerSecond != nil { - patched.InputCostPerVideoPerSecond = override.InputCostPerVideoPerSecond - } - if override.InputCostPerAudioPerSecond != nil { - patched.InputCostPerAudioPerSecond = override.InputCostPerAudioPerSecond - } - if override.InputCostPerTokenAbove200kTokens != nil { - patched.InputCostPerTokenAbove200kTokens = override.InputCostPerTokenAbove200kTokens - } - if override.OutputCostPerTokenAbove200kTokens != nil { - patched.OutputCostPerTokenAbove200kTokens = override.OutputCostPerTokenAbove200kTokens - } - if override.CacheCreationInputTokenCostAbove200kTokens != nil { - patched.CacheCreationInputTokenCostAbove200kTokens = override.CacheCreationInputTokenCostAbove200kTokens - } - if override.CacheReadInputTokenCostAbove200kTokens != nil { - patched.CacheReadInputTokenCostAbove200kTokens = override.CacheReadInputTokenCostAbove200kTokens - } - if override.CacheReadInputTokenCost != nil { - patched.CacheReadInputTokenCost = override.CacheReadInputTokenCost - } - if override.CacheCreationInputTokenCost != nil { - patched.CacheCreationInputTokenCost = override.CacheCreationInputTokenCost - } - if override.InputCostPerTokenBatches != nil { - patched.InputCostPerTokenBatches = override.InputCostPerTokenBatches - } - if override.OutputCostPerTokenBatches != nil { - patched.OutputCostPerTokenBatches = override.OutputCostPerTokenBatches - } - if override.InputCostPerImage != nil { - patched.InputCostPerImage = override.InputCostPerImage - } - if override.OutputCostPerImage != nil { - patched.OutputCostPerImage = override.OutputCostPerImage - } - if override.OutputCostPerImageLowQuality != nil { - patched.OutputCostPerImageLowQuality = override.OutputCostPerImageLowQuality - } - if override.OutputCostPerImageMediumQuality != nil { - patched.OutputCostPerImageMediumQuality = override.OutputCostPerImageMediumQuality + for _, field := range []struct { + dst **float64 + src *float64 + }{ + {dst: &patched.InputCostPerToken, src: override.InputCostPerToken}, + {dst: &patched.OutputCostPerToken, src: override.OutputCostPerToken}, + {dst: &patched.InputCostPerTokenPriority, src: override.InputCostPerTokenPriority}, + {dst: &patched.OutputCostPerTokenPriority, src: override.OutputCostPerTokenPriority}, + {dst: &patched.InputCostPerVideoPerSecond, src: override.InputCostPerVideoPerSecond}, + {dst: &patched.OutputCostPerVideoPerSecond, src: override.OutputCostPerVideoPerSecond}, + {dst: &patched.OutputCostPerSecond, src: override.OutputCostPerSecond}, + {dst: &patched.InputCostPerAudioPerSecond, src: override.InputCostPerAudioPerSecond}, + {dst: &patched.InputCostPerSecond, src: override.InputCostPerSecond}, + {dst: &patched.InputCostPerAudioToken, src: override.InputCostPerAudioToken}, + {dst: &patched.OutputCostPerAudioToken, src: override.OutputCostPerAudioToken}, + {dst: &patched.InputCostPerCharacter, src: override.InputCostPerCharacter}, + {dst: &patched.InputCostPerTokenAbove128kTokens, src: override.InputCostPerTokenAbove128kTokens}, + {dst: &patched.InputCostPerImageAbove128kTokens, src: override.InputCostPerImageAbove128kTokens}, + {dst: &patched.InputCostPerVideoPerSecondAbove128kTokens, src: override.InputCostPerVideoPerSecondAbove128kTokens}, + {dst: &patched.InputCostPerAudioPerSecondAbove128kTokens, src: override.InputCostPerAudioPerSecondAbove128kTokens}, + {dst: &patched.OutputCostPerTokenAbove128kTokens, src: override.OutputCostPerTokenAbove128kTokens}, + {dst: &patched.InputCostPerTokenAbove200kTokens, src: override.InputCostPerTokenAbove200kTokens}, + {dst: &patched.OutputCostPerTokenAbove200kTokens, src: override.OutputCostPerTokenAbove200kTokens}, + {dst: &patched.CacheCreationInputTokenCostAbove200kTokens, src: override.CacheCreationInputTokenCostAbove200kTokens}, + {dst: &patched.CacheReadInputTokenCostAbove200kTokens, src: override.CacheReadInputTokenCostAbove200kTokens}, + {dst: &patched.CacheReadInputTokenCost, src: override.CacheReadInputTokenCost}, + {dst: &patched.CacheCreationInputTokenCost, src: override.CacheCreationInputTokenCost}, + {dst: &patched.CacheCreationInputTokenCostAbove1hr, src: override.CacheCreationInputTokenCostAbove1hr}, + {dst: &patched.CacheCreationInputTokenCostAbove1hrAbove200kTokens, src: override.CacheCreationInputTokenCostAbove1hrAbove200kTokens}, + {dst: &patched.CacheCreationInputAudioTokenCost, src: override.CacheCreationInputAudioTokenCost}, + {dst: &patched.CacheReadInputTokenCostPriority, src: override.CacheReadInputTokenCostPriority}, + {dst: &patched.InputCostPerTokenBatches, src: override.InputCostPerTokenBatches}, + {dst: &patched.OutputCostPerTokenBatches, src: override.OutputCostPerTokenBatches}, + {dst: &patched.InputCostPerImageToken, src: override.InputCostPerImageToken}, + {dst: &patched.OutputCostPerImageToken, src: override.OutputCostPerImageToken}, + {dst: &patched.InputCostPerImage, src: override.InputCostPerImage}, + {dst: &patched.OutputCostPerImage, src: override.OutputCostPerImage}, + {dst: &patched.InputCostPerPixel, src: override.InputCostPerPixel}, + {dst: &patched.OutputCostPerPixel, src: override.OutputCostPerPixel}, + {dst: &patched.OutputCostPerImagePremiumImage, src: override.OutputCostPerImagePremiumImage}, + {dst: &patched.OutputCostPerImageAbove512x512Pixels, src: override.OutputCostPerImageAbove512x512Pixels}, + {dst: &patched.OutputCostPerImageAbove512x512PixelsPremium, src: override.OutputCostPerImageAbove512x512PixelsPremium}, + {dst: &patched.OutputCostPerImageAbove1024x1024Pixels, src: override.OutputCostPerImageAbove1024x1024Pixels}, + {dst: &patched.OutputCostPerImageAbove1024x1024PixelsPremium, src: override.OutputCostPerImageAbove1024x1024PixelsPremium}, + {dst: &patched.OutputCostPerImageAbove2048x2048Pixels, src: override.OutputCostPerImageAbove2048x2048Pixels}, + {dst: &patched.OutputCostPerImageAbove4096x4096Pixels, src: override.OutputCostPerImageAbove4096x4096Pixels}, + {dst: &patched.CacheReadInputImageTokenCost, src: override.CacheReadInputImageTokenCost}, + {dst: &patched.SearchContextCostPerQuery, src: override.SearchContextCostPerQuery}, + {dst: &patched.CodeInterpreterCostPerSession, src: override.CodeInterpreterCostPerSession}, + {dst: &patched.OutputCostPerImageLowQuality, src: override.OutputCostPerImageLowQuality}, + {dst: &patched.OutputCostPerImageMediumQuality, src: override.OutputCostPerImageMediumQuality}, + {dst: &patched.OutputCostPerImageHighQuality, src: override.OutputCostPerImageHighQuality}, + {dst: &patched.OutputCostPerImageAutoQuality, src: override.OutputCostPerImageAutoQuality}, + } { + if field.src != nil { + *field.dst = field.src + } } - if override.OutputCostPerImageHighQuality != nil { - patched.OutputCostPerImageHighQuality = override.OutputCostPerImageHighQuality + return patched +} + +func (mc *ModelCatalog) loadPricingOverridesFromStore(ctx context.Context) error { + if mc.configStore == nil { + return nil } - if override.OutputCostPerImageAutoQuality != nil { - patched.OutputCostPerImageAutoQuality = override.OutputCostPerImageAutoQuality + rows, err := mc.configStore.GetPricingOverrides(ctx, configstore.PricingOverrideFilters{}) + if err != nil { + return err } - - return patched + return mc.SetPricingOverrides(rows) } diff --git a/framework/modelcatalog/overrides_test.go b/framework/modelcatalog/overrides_test.go index 5f2ae1df49..f073dd65c8 100644 --- a/framework/modelcatalog/overrides_test.go +++ b/framework/modelcatalog/overrides_test.go @@ -3,6 +3,7 @@ package modelcatalog import ( "testing" + bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" "github.com/stretchr/testify/assert" @@ -22,150 +23,180 @@ func (noOpLogger) LogHTTPRequest(schemas.LogLevel, string) schemas.LogEventBuild return schemas.NoopLogEvent } -func TestSetProviderPricingOverrides_InvalidRegex(t *testing.T) { - t.Skip() - mc := newTestCatalog(nil, nil) - err := mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ - { - ModelPattern: "[", - MatchType: schemas.PricingOverrideMatchRegex, - }, - }) - require.Error(t, err) -} - -func TestGetPricing_OverridePrecedenceExactWildcardRegex(t *testing.T) { - t.Skip() +func TestGetPricing_OverridePrecedenceExactWildcard(t *testing.T) { mc := newTestCatalog(nil, nil) mc.logger = noOpLogger{} mc.pricingData[makeKey("gpt-4o", "openai", "chat")] = configstoreTables.TableModelPricing{ Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - exact := 20.0 - wildcard := 10.0 - regex := 30.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ - { - ModelPattern: "gpt-*", - MatchType: schemas.PricingOverrideMatchWildcard, - InputCostPerToken: &wildcard, - }, + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "^gpt-.*$", - MatchType: schemas.PricingOverrideMatchRegex, - InputCostPerToken: ®ex, + ID: "openai-override-0", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeWildcard), + Pattern: "gpt-*", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":10}`, }, { - ModelPattern: "gpt-4o", - MatchType: schemas.PricingOverrideMatchExact, - InputCostPerToken: &exact, + ID: "openai-override-1", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":20}`, }, })) - pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) - assert.Equal(t, 20.0, pricing.InputCostPerToken) - assert.Equal(t, 2.0, pricing.OutputCostPerToken) + require.NotNil(t, pricing.InputCostPerToken) + assert.Equal(t, 20.0, *pricing.InputCostPerToken) } -func TestGetPricing_WildcardBeatsRegex(t *testing.T) { +func TestGetPricing_RequestTypeSpecificOverrideBeatsGeneric(t *testing.T) { t.Skip() mc := newTestCatalog(nil, nil) mc.logger = noOpLogger{} - mc.pricingData[makeKey("gpt-4o-mini", "openai", "chat")] = configstoreTables.TableModelPricing{ - Model: "gpt-4o-mini", + mc.pricingData[makeKey("gpt-4o", "openai", "responses")] = configstoreTables.TableModelPricing{ + Model: "gpt-4o", Provider: "openai", - Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + Mode: "responses", + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - wildcard := 11.0 - regex := 12.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "^gpt-4o.*$", - MatchType: schemas.PricingOverrideMatchRegex, - InputCostPerToken: ®ex, + ID: "openai-generic", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + PricingPatchJSON: `{"input_cost_per_token":9}`, }, { - ModelPattern: "gpt-4o*", - MatchType: schemas.PricingOverrideMatchWildcard, - InputCostPerToken: &wildcard, + ID: "openai-specific", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + RequestTypes: []schemas.RequestType{schemas.ResponsesRequest}, + PricingPatchJSON: `{"input_cost_per_token":15}`, }, })) - pricing, ok := mc.getPricing("gpt-4o-mini", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) - assert.Equal(t, 11.0, pricing.InputCostPerToken) + assert.Equal(t, 15.0, pricing.InputCostPerToken) } -func TestGetPricing_RequestTypeSpecificOverrideBeatsGeneric(t *testing.T) { +func TestGetPricing_AppliesOverrideAfterFallbackResolution(t *testing.T) { t.Skip() mc := newTestCatalog(nil, nil) mc.logger = noOpLogger{} - mc.pricingData[makeKey("gpt-4o", "openai", "responses")] = configstoreTables.TableModelPricing{ + mc.pricingData[makeKey("gpt-4o", "vertex", "chat")] = configstoreTables.TableModelPricing{ Model: "gpt-4o", - Provider: "openai", - Mode: "responses", - InputCostPerToken: 1, - OutputCostPerToken: 2, + Provider: "vertex", + Mode: "chat", + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - specific := 15.0 - generic := 9.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ + geminiProviderID := "gemini" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "gpt-4o", - MatchType: schemas.PricingOverrideMatchExact, - InputCostPerToken: &generic, + ID: "gemini-override", + ScopeKind: string(ScopeKindProvider), + ProviderID: &geminiProviderID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + PricingPatchJSON: `{"input_cost_per_token":7}`, }, + })) + + pricing := mc.resolvePricing("gemini", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"}) + require.NotNil(t, pricing) + assert.Equal(t, 7.0, pricing.InputCostPerToken) +} + +func TestGetPricing_DeploymentLookupUsesRequestedModelForOverrideMatching(t *testing.T) { + mc := newTestCatalog(nil, nil) + mc.logger = noOpLogger{} + mc.pricingData[makeKey("dep-gpt4o", "openai", "chat")] = configstoreTables.TableModelPricing{ + Model: "dep-gpt4o", + Provider: "openai", + Mode: "chat", + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), + } + + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "gpt-4o", - MatchType: schemas.PricingOverrideMatchExact, - RequestTypes: []schemas.RequestType{schemas.ResponsesRequest}, - InputCostPerToken: &specific, + ID: "requested-model-override", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":7}`, }, })) - pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ResponsesRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "gpt-4o", "dep-gpt4o", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) - assert.Equal(t, 15.0, pricing.InputCostPerToken) + require.NotNil(t, pricing.InputCostPerToken) + assert.Equal(t, 7.0, *pricing.InputCostPerToken) } -func TestGetPricing_AppliesOverrideAfterFallbackResolution(t *testing.T) { - t.Skip() +func TestGetPricing_FallbackUsesRequestedProviderForScopeMatching(t *testing.T) { mc := newTestCatalog(nil, nil) mc.logger = noOpLogger{} mc.pricingData[makeKey("gpt-4o", "vertex", "chat")] = configstoreTables.TableModelPricing{ Model: "gpt-4o", Provider: "vertex", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - override := 7.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.Gemini, []schemas.ProviderPricingOverride{ + geminiProviderID := "gemini" + vertexProviderID := "vertex" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ + { + ID: "gemini-provider-override", + ScopeKind: string(ScopeKindProvider), + ProviderID: &geminiProviderID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":5}`, + }, { - ModelPattern: "gpt-4o", - MatchType: schemas.PricingOverrideMatchExact, - InputCostPerToken: &override, + ID: "vertex-provider-override", + ScopeKind: string(ScopeKindProvider), + ProviderID: &vertexProviderID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":9}`, }, })) - pricing, ok := mc.getPricing("gpt-4o", "gemini", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("gemini", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"}) require.NotNil(t, pricing) - assert.Equal(t, 7.0, pricing.InputCostPerToken) + require.NotNil(t, pricing.InputCostPerToken) + assert.Equal(t, 5.0, *pricing.InputCostPerToken) } func TestGetPricing_ExactOverrideDoesNotMatchProviderPrefixedModel(t *testing.T) { @@ -176,21 +207,23 @@ func TestGetPricing_ExactOverrideDoesNotMatchProviderPrefixedModel(t *testing.T) Model: "openai/gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - override := 19.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "gpt-4o", - MatchType: schemas.PricingOverrideMatchExact, - InputCostPerToken: &override, + ID: "openai-override-0", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + PricingPatchJSON: `{"input_cost_per_token":19}`, }, })) - pricing, ok := mc.getPricing("openai/gpt-4o", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "openai/gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) assert.Equal(t, 1.0, pricing.InputCostPerToken) } @@ -204,22 +237,24 @@ func TestGetPricing_NoMatchingOverrideLeavesPricingUnchanged(t *testing.T) { Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), CacheReadInputTokenCost: &baseCacheRead, } - override := 9.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "claude-*", - MatchType: schemas.PricingOverrideMatchWildcard, - InputCostPerToken: &override, + ID: "openai-override-0", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeWildcard), + Pattern: "claude-*", + PricingPatchJSON: `{"input_cost_per_token":9}`, }, })) - pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) assert.Equal(t, 1.0, pricing.InputCostPerToken) assert.Equal(t, 2.0, pricing.OutputCostPerToken) @@ -235,28 +270,29 @@ func TestDeleteProviderPricingOverrides_StopsApplying(t *testing.T) { Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - override := 11.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "gpt-4o", - MatchType: schemas.PricingOverrideMatchExact, - InputCostPerToken: &override, + ID: "openai-override-0", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-4o", + PricingPatchJSON: `{"input_cost_per_token":11}`, }, })) - pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) assert.Equal(t, 11.0, pricing.InputCostPerToken) - mc.DeleteProviderPricingOverrides(schemas.OpenAI) + require.NoError(t, mc.SetPricingOverrides(nil)) - pricing, ok = mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing = mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) assert.Equal(t, 1.0, pricing.InputCostPerToken) } @@ -269,62 +305,74 @@ func TestGetPricing_WildcardSpecificityLongerLiteralWins(t *testing.T) { Model: "gpt-4o-mini", Provider: "openai", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - generic := 5.0 - specific := 6.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "gpt-*", - MatchType: schemas.PricingOverrideMatchWildcard, - InputCostPerToken: &generic, + ID: "openai-override-0", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeWildcard), + Pattern: "gpt-*", + PricingPatchJSON: `{"input_cost_per_token":5}`, }, { - ModelPattern: "gpt-4o*", - MatchType: schemas.PricingOverrideMatchWildcard, - InputCostPerToken: &specific, + ID: "openai-override-1", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeWildcard), + Pattern: "gpt-4o*", + PricingPatchJSON: `{"input_cost_per_token":6}`, }, })) - pricing, ok := mc.getPricing("gpt-4o-mini", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "gpt-4o-mini", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) assert.Equal(t, 6.0, pricing.InputCostPerToken) } -func TestGetPricing_ConfigOrderTiebreakFirstWinsWhenEqual(t *testing.T) { - t.Skip() +// TestGetPricing_FirstInsertionWinsOnTie verifies that when multiple wildcard overrides +// match the same model and scope, the first one inserted takes precedence. +func TestGetPricing_FirstInsertionWinsOnTie(t *testing.T) { mc := newTestCatalog(nil, nil) mc.logger = noOpLogger{} mc.pricingData[makeKey("gpt-4o-mini", "openai", "chat")] = configstoreTables.TableModelPricing{ Model: "gpt-4o-mini", Provider: "openai", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), } - first := 8.0 - second := 9.0 - require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{ + providerID := "openai" + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ { - ModelPattern: "gpt-4o*", - MatchType: schemas.PricingOverrideMatchWildcard, - InputCostPerToken: &first, + ID: "a-override", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeWildcard), + Pattern: "gpt-4o*", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":8}`, }, { - ModelPattern: "gpt-4o*", - MatchType: schemas.PricingOverrideMatchWildcard, - InputCostPerToken: &second, + ID: "b-override", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerID, + MatchType: string(MatchTypeWildcard), + Pattern: "gpt-4o*", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":9}`, }, })) - pricing, ok := mc.getPricing("gpt-4o-mini", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) + pricing := mc.resolvePricing("openai", "gpt-4o-mini", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) require.NotNil(t, pricing) - assert.Equal(t, 8.0, pricing.InputCostPerToken) + require.NotNil(t, pricing.InputCostPerToken) + assert.Equal(t, 8.0, *pricing.InputCostPerToken) } func TestPatchPricing_PartialPatchOnlyChangesSpecifiedFields(t *testing.T) { @@ -335,26 +383,122 @@ func TestPatchPricing_PartialPatchOnlyChangesSpecifiedFields(t *testing.T) { Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 1, - OutputCostPerToken: 2, + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), CacheReadInputTokenCost: &baseCacheRead, InputCostPerImage: &baseInputImage, } - patched := patchPricing(base, schemas.ProviderPricingOverride{ - ModelPattern: "gpt-4o", - MatchType: schemas.PricingOverrideMatchExact, - InputCostPerToken: schemas.Ptr(3.0), - CacheReadInputTokenCost: schemas.Ptr(0.9), + cacheRead := 0.9 + patched := patchPricing(base, PricingOptions{ + InputCostPerToken: bifrost.Ptr(3.0), + CacheReadInputTokenCost: &cacheRead, }) - // Changed fields assert.Equal(t, 3.0, patched.InputCostPerToken) require.NotNil(t, patched.CacheReadInputTokenCost) assert.Equal(t, 0.9, *patched.CacheReadInputTokenCost) - // Unchanged fields assert.Equal(t, 2.0, patched.OutputCostPerToken) require.NotNil(t, patched.InputCostPerImage) assert.Equal(t, 0.7, *patched.InputCostPerImage) } + +func TestApplyScopedPricingOverrides_ScopePrecedence(t *testing.T) { + mc := newTestCatalog(nil, nil) + mc.logger = noOpLogger{} + + providerScopeID := "openai" + providerKeyScopeID := "provider-key-1" + virtualKeyScopeID := "virtual-key-1" + + require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{ + { + ID: "global", + ScopeKind: string(ScopeKindGlobal), + MatchType: string(MatchTypeExact), + Pattern: "gpt-5-nano", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":2}`, + }, + { + ID: "provider", + ScopeKind: string(ScopeKindProvider), + ProviderID: &providerScopeID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-5-nano", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":3}`, + }, + { + ID: "provider-key", + ScopeKind: string(ScopeKindProviderKey), + ProviderKeyID: &providerKeyScopeID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-5-nano", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":4}`, + }, + { + ID: "virtual-key", + ScopeKind: string(ScopeKindVirtualKey), + VirtualKeyID: &virtualKeyScopeID, + MatchType: string(MatchTypeExact), + Pattern: "gpt-5-nano", + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + PricingPatchJSON: `{"input_cost_per_token":5}`, + }, + })) + + base := configstoreTables.TableModelPricing{ + Model: "gpt-5-nano", + Provider: "openai", + Mode: "chat", + InputCostPerToken: bifrost.Ptr(1.0), + OutputCostPerToken: bifrost.Ptr(2.0), + } + + tests := []struct { + name string + scopes PricingLookupScopes + expected float64 + }{ + { + name: "virtual key wins over provider key, provider and global", + scopes: PricingLookupScopes{ + VirtualKeyID: virtualKeyScopeID, + SelectedKeyID: providerKeyScopeID, + Provider: providerScopeID, + }, + expected: 5.0, + }, + { + name: "provider key wins over provider and global", + scopes: PricingLookupScopes{ + SelectedKeyID: providerKeyScopeID, + Provider: providerScopeID, + }, + expected: 4.0, + }, + { + name: "provider wins over global", + scopes: PricingLookupScopes{ + Provider: providerScopeID, + }, + expected: 3.0, + }, + { + name: "global applies when no narrower scope is provided", + scopes: PricingLookupScopes{}, + expected: 2.0, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + patched, applied := mc.applyPricingOverrides("gpt-5-nano", schemas.ChatCompletionRequest, base, tc.scopes) + require.True(t, applied) + assert.Equal(t, tc.expected, patched.InputCostPerToken) + }) + } +} diff --git a/framework/modelcatalog/pricing.go b/framework/modelcatalog/pricing.go index decb3e78ea..b9d7525f2f 100644 --- a/framework/modelcatalog/pricing.go +++ b/framework/modelcatalog/pricing.go @@ -23,22 +23,29 @@ type costInput struct { // CalculateCost calculates the cost of a Bifrost response. // It handles all request types, cache debug billing, and tiered pricing. -func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 { +// If scopes is nil, an empty PricingLookupScopes is used; global and provider-scoped +// overrides may still apply since the provider is derived from the response. +func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse, scopes *PricingLookupScopes) float64 { if result == nil { return 0 } + var s PricingLookupScopes + if scopes != nil { + s = *scopes + } + // Handle semantic cache billing cacheDebug := result.GetExtraFields().CacheDebug if cacheDebug != nil { - return mc.calculateCostWithCache(result, cacheDebug) + return mc.calculateCostWithCache(result, cacheDebug, s) } - return mc.calculateBaseCost(result) + return mc.calculateBaseCost(result, s) } // calculateCostWithCache handles cost calculation when semantic cache debug info is present. -func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse, cacheDebug *schemas.BifrostCacheDebug) float64 { +func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse, cacheDebug *schemas.BifrostCacheDebug, scopes PricingLookupScopes) float64 { if cacheDebug.CacheHit { // Direct cache hit — no LLM call, no cost if cacheDebug.HitType != nil && *cacheDebug.HitType == "direct" { @@ -46,31 +53,34 @@ func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse, } // Semantic cache hit — only the embedding lookup cost if cacheDebug.ProviderUsed != nil && cacheDebug.ModelUsed != nil && cacheDebug.InputTokens != nil { - return mc.computeCacheEmbeddingCost(cacheDebug) + return mc.computeCacheEmbeddingCost(cacheDebug, scopes) } return 0 } // Cache miss — full LLM cost + embedding lookup cost - baseCost := mc.calculateBaseCost(result) - embeddingCost := mc.computeCacheEmbeddingCost(cacheDebug) + baseCost := mc.calculateBaseCost(result, scopes) + embeddingCost := mc.computeCacheEmbeddingCost(cacheDebug, scopes) return baseCost + embeddingCost } // computeCacheEmbeddingCost calculates the embedding cost for a semantic cache lookup. -func (mc *ModelCatalog) computeCacheEmbeddingCost(cacheDebug *schemas.BifrostCacheDebug) float64 { +func (mc *ModelCatalog) computeCacheEmbeddingCost(cacheDebug *schemas.BifrostCacheDebug, scopes PricingLookupScopes) float64 { if cacheDebug == nil || cacheDebug.ProviderUsed == nil || cacheDebug.ModelUsed == nil || cacheDebug.InputTokens == nil { return 0 } - pricing, exists := mc.getPricing(*cacheDebug.ModelUsed, *cacheDebug.ProviderUsed, schemas.EmbeddingRequest) - if !exists { + if scopes.Provider == "" { + scopes.Provider = *cacheDebug.ProviderUsed + } + pricing := mc.resolvePricing(*cacheDebug.ProviderUsed, *cacheDebug.ModelUsed, "", schemas.EmbeddingRequest, scopes) + if pricing == nil { return 0 } return float64(*cacheDebug.InputTokens) * tieredInputRate(pricing, *cacheDebug.InputTokens) } // calculateBaseCost extracts usage from the response and routes to the appropriate compute function. -func (mc *ModelCatalog) calculateBaseCost(result *schemas.BifrostResponse) float64 { +func (mc *ModelCatalog) calculateBaseCost(result *schemas.BifrostResponse, scopes PricingLookupScopes) float64 { extraFields := result.GetExtraFields() if extraFields == nil { return 0 @@ -98,7 +108,7 @@ func (mc *ModelCatalog) calculateBaseCost(result *schemas.BifrostResponse) float requestType = normalizeStreamRequestType(requestType) // Resolve pricing entry with deployment fallback - pricing := mc.resolvePricing(provider, model, deployment, requestType) + pricing := mc.resolvePricing(provider, model, deployment, requestType, scopes) if pricing == nil { return 0 } @@ -598,7 +608,10 @@ func tieredInputRate(pricing *configstoreTables.TableModelPricing, totalTokens i if totalTokens > TokenTierAbove128K && pricing.InputCostPerTokenAbove128kTokens != nil { return *pricing.InputCostPerTokenAbove128kTokens } - return pricing.InputCostPerToken + if pricing.InputCostPerToken != nil { + return *pricing.InputCostPerToken + } + return 0 } // tieredOutputRate returns the effective per-token output rate based on total token count. @@ -609,7 +622,10 @@ func tieredOutputRate(pricing *configstoreTables.TableModelPricing, totalTokens if totalTokens > TokenTierAbove128K && pricing.OutputCostPerTokenAbove128kTokens != nil { return *pricing.OutputCostPerTokenAbove128kTokens } - return pricing.OutputCostPerToken + if pricing.OutputCostPerToken != nil { + return *pricing.OutputCostPerToken + } + return 0 } // tieredImageInputRate returns the effective rate for image tokens on the input side. @@ -743,28 +759,60 @@ func populateOutputImageCount(imageUsage *schemas.ImageUsage, dataLen int) { // --------------------------------------------------------------------------- // resolvePricing resolves the pricing entry for a model, trying deployment as fallback. -func (mc *ModelCatalog) resolvePricing(provider, model, deployment string, requestType schemas.RequestType) *configstoreTables.TableModelPricing { +func (mc *ModelCatalog) resolvePricing(provider, model, deployment string, requestType schemas.RequestType, scopes PricingLookupScopes) *configstoreTables.TableModelPricing { mc.logger.Debug("looking up pricing for model %s and provider %s of request type %s", model, provider, normalizeRequestType(requestType)) - pricing, exists := mc.getPricing(model, provider, requestType) - if exists { - return pricing + if scopes.Provider == "" { + scopes.Provider = provider + } + + base, exists := mc.getBasePricing(model, provider, requestType) + if exists && base != nil { + result, _ := mc.applyPricingOverrides(model, requestType, *base, scopes) + return &result } if deployment != "" { mc.logger.Debug("pricing not found for model %s, trying deployment %s", model, deployment) - pricing, exists = mc.getPricing(deployment, provider, requestType) - if exists { - return pricing + base, exists = mc.getBasePricing(deployment, provider, requestType) + if exists && base != nil { + // Apply overrides using the requested model name, not the deployment name + result, _ := mc.applyPricingOverrides(model, requestType, *base, scopes) + return &result } } - mc.logger.Debug("pricing not found for model %s and provider %s, skipping cost calculation", model, provider) + // No base catalog entry found; still try overrides in case the user defined + // override-only pricing for a model not in the built-in catalog. + mc.logger.Debug("pricing not found for model %s and provider %s, trying override-only pricing", model, provider) + result, applied := mc.applyPricingOverrides(model, requestType, configstoreTables.TableModelPricing{}, scopes) + if applied { + return &result + } + mc.logger.Debug("no pricing found for model %s and provider %s, skipping cost calculation", model, provider) return nil } -// getPricing returns pricing information for a model (thread-safe) -func (mc *ModelCatalog) getPricing(model, provider string, requestType schemas.RequestType) (*configstoreTables.TableModelPricing, bool) { +// getBasePricing looks up catalog pricing for the given model, provider, and request type. +// It applies a provider-specific fallback chain when an exact match is not found: +// +// - Gemini: retries under the "vertex" provider, then falls back to chat mode for Responses requests. +// - Vertex: strips the "provider/model" prefix and retries, then falls back to chat mode for Responses requests. +// - Bedrock: prepends the "anthropic." namespace for Claude models, then falls back to chat mode for Responses requests. +// - All providers: for Responses/ResponsesStream requests, retries the lookup in chat mode. +// - All providers: for ImageEdit/ImageVariation requests, retries the lookup in image-generation mode. +// +// The method acquires a read lock for the duration of the lookup. +// +// Input: model — exact model name to look up. +// +// provider — provider identifier (e.g. "openai", "anthropic"). +// requestType — the request type used to derive the pricing mode. +// +// Output: TableModelPricing — the matched pricing row (zero value when not found). +// +// bool — true when a pricing entry was found, false otherwise. +func (mc *ModelCatalog) getBasePricing(model, provider string, requestType schemas.RequestType) (*configstoreTables.TableModelPricing, bool) { mc.mu.RLock() defer mc.mu.RUnlock() diff --git a/framework/modelcatalog/pricing_test.go b/framework/modelcatalog/pricing_test.go index 1433e0035f..d69301b4a7 100644 --- a/framework/modelcatalog/pricing_test.go +++ b/framework/modelcatalog/pricing_test.go @@ -3,6 +3,7 @@ package modelcatalog import ( "testing" + bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" "github.com/stretchr/testify/assert" @@ -13,17 +14,14 @@ import ( // helpers // --------------------------------------------------------------------------- -func ptr(v float64) *float64 { return &v } -func intPtr(v int) *int { return &v } - // chatPricing returns a TableModelPricing with the given per-token rates. func chatPricing(input, output float64) configstoreTables.TableModelPricing { return configstoreTables.TableModelPricing{ Model: "test-model", Provider: "test-provider", Mode: "chat", - InputCostPerToken: input, - OutputCostPerToken: output, + InputCostPerToken: bifrost.Ptr(input), + OutputCostPerToken: bifrost.Ptr(output), } } @@ -93,6 +91,13 @@ func makeImageResponse(provider schemas.ModelProvider, model string, usage *sche } } +func derefF(f *float64) float64 { + if f == nil { + return 0 + } + return *f +} + // ========================================================================= // 1. computeTextCost — unit tests (pure function, no catalog) // ========================================================================= @@ -124,8 +129,8 @@ func TestComputeTextCost_ZeroTokens(t *testing.T) { func TestComputeTextCost_WithCachedPromptTokens(t *testing.T) { // Claude 3.5 Sonnet (Bedrock): input=$3/M, output=$15/M, cache_read=$0.3/M, cache_creation=$3.75/M p := chatPricing(0.000003, 0.000015) - p.CacheReadInputTokenCost = ptr(0.0000003) - p.CacheCreationInputTokenCost = ptr(0.00000375) + p.CacheReadInputTokenCost = bifrost.Ptr(0.0000003) + p.CacheCreationInputTokenCost = bifrost.Ptr(0.00000375) usage := &schemas.BifrostLLMUsage{ PromptTokens: 2000, @@ -149,8 +154,8 @@ func TestComputeTextCost_WithCachedPromptTokens(t *testing.T) { func TestComputeTextCost_Tiered200k(t *testing.T) { // Claude 3.5 Sonnet Bedrock 200k tier: input=$6/M, output=$30/M p := chatPricing(0.000003, 0.000015) - p.InputCostPerTokenAbove200kTokens = ptr(0.000006) - p.OutputCostPerTokenAbove200kTokens = ptr(0.00003) + p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006) + p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003) usage := &schemas.BifrostLLMUsage{ PromptTokens: 180000, @@ -167,8 +172,8 @@ func TestComputeTextCost_Tiered200k(t *testing.T) { func TestComputeTextCost_Below200kUsesBaseRate(t *testing.T) { p := chatPricing(0.000003, 0.000015) - p.InputCostPerTokenAbove200kTokens = ptr(0.000006) - p.OutputCostPerTokenAbove200kTokens = ptr(0.00003) + p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006) + p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, @@ -185,7 +190,7 @@ func TestComputeTextCost_Below200kUsesBaseRate(t *testing.T) { func TestComputeTextCost_SearchQueryCost(t *testing.T) { p := chatPricing(0.000003, 0.000015) - p.SearchContextCostPerQuery = ptr(0.01) // $0.01 per search query + p.SearchContextCostPerQuery = bifrost.Ptr(0.01) // $0.01 per search query numQueries := 3 usage := &schemas.BifrostLLMUsage{ @@ -232,8 +237,8 @@ func TestComputeTextCost_NoCacheRateFallsBackToBaseInputRate(t *testing.T) { func TestComputeEmbeddingCost_Basic(t *testing.T) { // Titan Embed Text v1: $0.1/M input p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.0000001, - OutputCostPerToken: 0, + InputCostPerToken: bifrost.Ptr(0.0000001), + OutputCostPerToken: bifrost.Ptr(0.0), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 5000, @@ -245,7 +250,7 @@ func TestComputeEmbeddingCost_Basic(t *testing.T) { } func TestComputeEmbeddingCost_NilUsage(t *testing.T) { - p := configstoreTables.TableModelPricing{InputCostPerToken: 0.0000001} + p := configstoreTables.TableModelPricing{InputCostPerToken: bifrost.Ptr(0.0000001)} assert.Equal(t, 0.0, computeEmbeddingCost(&p, nil)) } @@ -255,8 +260,8 @@ func TestComputeEmbeddingCost_NilUsage(t *testing.T) { func TestComputeRerankCost_Basic(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000001, - OutputCostPerToken: 0.000002, + InputCostPerToken: bifrost.Ptr(0.000001), + OutputCostPerToken: bifrost.Ptr(0.000002), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 2000, @@ -270,9 +275,9 @@ func TestComputeRerankCost_Basic(t *testing.T) { func TestComputeRerankCost_WithSearchCost(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0, - OutputCostPerToken: 0, - SearchContextCostPerQuery: ptr(0.001), + InputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerToken: bifrost.Ptr(0.0), + SearchContextCostPerQuery: bifrost.Ptr(0.001), } numQueries := 5 usage := &schemas.BifrostLLMUsage{ @@ -285,7 +290,7 @@ func TestComputeRerankCost_WithSearchCost(t *testing.T) { } func TestComputeRerankCost_NilUsage(t *testing.T) { - p := configstoreTables.TableModelPricing{InputCostPerToken: 0.001} + p := configstoreTables.TableModelPricing{InputCostPerToken: bifrost.Ptr(0.001)} assert.Equal(t, 0.0, computeRerankCost(&p, nil)) } @@ -296,9 +301,9 @@ func TestComputeRerankCost_NilUsage(t *testing.T) { func TestComputeSpeechCost_TokensPreferredOverDuration(t *testing.T) { // TTS: input=text tokens, output=audio tokens (preferred over per-second) p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.0000025, - OutputCostPerToken: 0.00001, - OutputCostPerSecond: ptr(0.00025), + InputCostPerToken: bifrost.Ptr(0.0000025), + OutputCostPerToken: bifrost.Ptr(0.00001), + OutputCostPerSecond: bifrost.Ptr(0.00025), } seconds := 60 usage := &schemas.BifrostLLMUsage{ @@ -317,9 +322,9 @@ func TestComputeSpeechCost_TokensPreferredOverDuration(t *testing.T) { func TestComputeSpeechCost_OutputFallsBackToPerSecond(t *testing.T) { // TTS: no output tokens → falls back to per-second output pricing p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000001, - OutputCostPerToken: 0.000002, - OutputCostPerSecond: ptr(0.0001), + InputCostPerToken: bifrost.Ptr(0.000001), + OutputCostPerToken: bifrost.Ptr(0.000002), + OutputCostPerSecond: bifrost.Ptr(0.0001), } seconds := 120 usage := &schemas.BifrostLLMUsage{PromptTokens: 500} @@ -333,9 +338,9 @@ func TestComputeSpeechCost_OutputFallsBackToPerSecond(t *testing.T) { func TestComputeSpeechCost_OutputAudioTokenRate(t *testing.T) { // TTS: output uses OutputCostPerAudioToken when available p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000001, - OutputCostPerToken: 0.000002, - OutputCostPerAudioToken: ptr(0.00005), + InputCostPerToken: bifrost.Ptr(0.000001), + OutputCostPerToken: bifrost.Ptr(0.000002), + OutputCostPerAudioToken: bifrost.Ptr(0.00005), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 200, @@ -373,9 +378,9 @@ func TestComputeSpeechCost_NilUsageNilSeconds(t *testing.T) { func TestComputeTranscriptionCost_DurationBased(t *testing.T) { // assemblyai/nano: input_cost_per_second=0.00010278 p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0, - OutputCostPerToken: 0, - InputCostPerSecond: ptr(0.00010278), + InputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerToken: bifrost.Ptr(0.0), + InputCostPerSecond: bifrost.Ptr(0.00010278), } seconds := 300 // 5 minutes cost := computeTranscriptionCost(&p, nil, &seconds, nil) @@ -385,9 +390,9 @@ func TestComputeTranscriptionCost_DurationBased(t *testing.T) { func TestComputeTranscriptionCost_AudioTokenDetails(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, - InputCostPerAudioToken: ptr(0.00001), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), + InputCostPerAudioToken: bifrost.Ptr(0.00001), } usage := &schemas.BifrostLLMUsage{ PromptTokens: 2000, @@ -421,10 +426,10 @@ func TestComputeTranscriptionCost_TokenFallback(t *testing.T) { func TestComputeTranscriptionCost_TokenDetailsPreferredOverDuration(t *testing.T) { // STT: audio token details present → uses tokens, not per-second p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0, - InputCostPerAudioPerSecond: ptr(0.0001), - InputCostPerAudioToken: ptr(0.00001), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.0), + InputCostPerAudioPerSecond: bifrost.Ptr(0.0001), + InputCostPerAudioToken: bifrost.Ptr(0.00001), } seconds := 60 audioDetails := &schemas.TranscriptionUsageInputTokenDetails{ @@ -443,9 +448,9 @@ func TestComputeTranscriptionCost_TokenDetailsPreferredOverDuration(t *testing.T func TestComputeTranscriptionCost_DurationFallbackWhenNoTokens(t *testing.T) { // STT: no audio token details, no prompt tokens → falls back to per-second p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, - InputCostPerAudioPerSecond: ptr(0.0001), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), + InputCostPerAudioPerSecond: bifrost.Ptr(0.0001), } seconds := 60 usage := &schemas.BifrostLLMUsage{ @@ -466,9 +471,9 @@ func TestComputeTranscriptionCost_DurationFallbackWhenNoTokens(t *testing.T) { func TestComputeImageCost_PerImage(t *testing.T) { // dall-e-3 (aiml): output_cost_per_image=$0.052 p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0, - OutputCostPerToken: 0, - OutputCostPerImage: ptr(0.052), + InputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerImage: bifrost.Ptr(0.052), } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{ @@ -482,7 +487,7 @@ func TestComputeImageCost_PerImage(t *testing.T) { func TestComputeImageCost_PerImageDefaultsToOne(t *testing.T) { p := configstoreTables.TableModelPricing{ - OutputCostPerImage: ptr(0.052), + OutputCostPerImage: bifrost.Ptr(0.052), } usage := &schemas.ImageUsage{} // No token details → defaults to 1 image cost := computeImageCost(&p, usage, "", "") @@ -491,8 +496,8 @@ func TestComputeImageCost_PerImageDefaultsToOne(t *testing.T) { func TestComputeImageCost_TokenBased(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), } usage := &schemas.ImageUsage{ InputTokens: 1000, @@ -506,8 +511,8 @@ func TestComputeImageCost_TokenBased(t *testing.T) { func TestComputeImageCost_TokenBasedWithDetails(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), } usage := &schemas.ImageUsage{ InputTokens: 2000, @@ -530,14 +535,14 @@ func TestComputeImageCost_TokenBasedWithDetails(t *testing.T) { } func TestComputeImageCost_NilUsage(t *testing.T) { - p := configstoreTables.TableModelPricing{OutputCostPerImage: ptr(0.05)} + p := configstoreTables.TableModelPricing{OutputCostPerImage: bifrost.Ptr(0.05)} assert.Equal(t, 0.0, computeImageCost(&p, nil, "", "")) } func TestComputeImageCost_InputAndOutputPerImage(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerImage: ptr(0.01), - OutputCostPerImage: ptr(0.05), + InputCostPerImage: bifrost.Ptr(0.01), + OutputCostPerImage: bifrost.Ptr(0.05), } usage := &schemas.ImageUsage{ NumInputImages: 3, @@ -550,7 +555,7 @@ func TestComputeImageCost_InputAndOutputPerImage(t *testing.T) { func TestComputeImageCost_PerPixelOutput(t *testing.T) { p := configstoreTables.TableModelPricing{ - OutputCostPerPixel: ptr(0.000000019), // ~$0.02 for 1024x1024 + OutputCostPerPixel: bifrost.Ptr(0.000000019), // ~$0.02 for 1024x1024 } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1}, @@ -562,8 +567,8 @@ func TestComputeImageCost_PerPixelOutput(t *testing.T) { func TestComputeImageCost_PerPixelInputAndOutput(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerPixel: ptr(0.00000001), - OutputCostPerPixel: ptr(0.00000002), + InputCostPerPixel: bifrost.Ptr(0.00000001), + OutputCostPerPixel: bifrost.Ptr(0.00000002), } usage := &schemas.ImageUsage{ NumInputImages: 2, @@ -579,10 +584,10 @@ func TestComputeImageCost_PerPixelInputAndOutput(t *testing.T) { func TestComputeImageCost_TokensPreferredOverPixels(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, - InputCostPerPixel: ptr(0.00000001), - OutputCostPerPixel: ptr(0.00000002), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), + InputCostPerPixel: bifrost.Ptr(0.00000001), + OutputCostPerPixel: bifrost.Ptr(0.00000002), } usage := &schemas.ImageUsage{ InputTokens: 1000, @@ -596,8 +601,8 @@ func TestComputeImageCost_TokensPreferredOverPixels(t *testing.T) { func TestComputeImageCost_PixelsPreferredOverPerImage(t *testing.T) { p := configstoreTables.TableModelPricing{ - OutputCostPerPixel: ptr(0.00000002), - OutputCostPerImage: ptr(999.0), // should not be used + OutputCostPerPixel: bifrost.Ptr(0.00000002), + OutputCostPerImage: bifrost.Ptr(999.0), // should not be used } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1}, @@ -609,8 +614,8 @@ func TestComputeImageCost_PixelsPreferredOverPerImage(t *testing.T) { func TestComputeImageCost_PerPixelFallsBackToPerImage_WhenNoSize(t *testing.T) { p := configstoreTables.TableModelPricing{ - OutputCostPerPixel: ptr(0.00000002), - OutputCostPerImage: ptr(0.05), + OutputCostPerPixel: bifrost.Ptr(0.00000002), + OutputCostPerImage: bifrost.Ptr(0.05), } usage := &schemas.ImageUsage{ OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2}, @@ -626,11 +631,11 @@ func TestComputeImageCost_QualityBasedRates(t *testing.T) { } // Quality-specific rates take precedence over base/size-tier p := configstoreTables.TableModelPricing{ - OutputCostPerImage: ptr(0.01), - OutputCostPerImageLowQuality: ptr(0.02), - OutputCostPerImageMediumQuality: ptr(0.03), - OutputCostPerImageHighQuality: ptr(0.04), - OutputCostPerImageAutoQuality: ptr(0.05), + OutputCostPerImage: bifrost.Ptr(0.01), + OutputCostPerImageLowQuality: bifrost.Ptr(0.02), + OutputCostPerImageMediumQuality: bifrost.Ptr(0.03), + OutputCostPerImageHighQuality: bifrost.Ptr(0.04), + OutputCostPerImageAutoQuality: bifrost.Ptr(0.05), } assert.InDelta(t, 0.02, computeImageCost(&p, usage, "", "low"), 1e-12) assert.InDelta(t, 0.03, computeImageCost(&p, usage, "", "medium"), 1e-12) @@ -659,9 +664,9 @@ func TestParseImagePixels(t *testing.T) { func TestComputeVideoCost_DurationBased(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000001, - OutputCostPerToken: 0, - OutputCostPerVideoPerSecond: ptr(0.001), + InputCostPerToken: bifrost.Ptr(0.000001), + OutputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerVideoPerSecond: bifrost.Ptr(0.001), } seconds := 30 usage := &schemas.BifrostLLMUsage{PromptTokens: 500, TotalTokens: 500} @@ -674,9 +679,9 @@ func TestComputeVideoCost_DurationBased(t *testing.T) { func TestComputeVideoCost_OutputCostPerSecondFallback(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0, - OutputCostPerToken: 0, - OutputCostPerSecond: ptr(0.002), + InputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerSecond: bifrost.Ptr(0.002), } seconds := 10 cost := computeVideoCost(&p, nil, &seconds) @@ -685,8 +690,8 @@ func TestComputeVideoCost_OutputCostPerSecondFallback(t *testing.T) { func TestComputeVideoCost_NilSeconds(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000001, - OutputCostPerVideoPerSecond: ptr(0.001), + InputCostPerToken: bifrost.Ptr(0.000001), + OutputCostPerVideoPerSecond: bifrost.Ptr(0.001), } usage := &schemas.BifrostLLMUsage{PromptTokens: 1000} cost := computeVideoCost(&p, usage, nil) @@ -700,23 +705,23 @@ func TestComputeVideoCost_NilSeconds(t *testing.T) { func TestTieredInputRate_BelowThreshold(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000003, - InputCostPerTokenAbove200kTokens: ptr(0.000006), + InputCostPerToken: bifrost.Ptr(0.000003), + InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), } assert.Equal(t, 0.000003, tieredInputRate(&p, 100000)) } func TestTieredInputRate_AboveThreshold(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000003, - InputCostPerTokenAbove200kTokens: ptr(0.000006), + InputCostPerToken: bifrost.Ptr(0.000003), + InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), } assert.Equal(t, 0.000006, tieredInputRate(&p, 210000)) } func TestTieredInputRate_AboveThresholdNoTieredRate(t *testing.T) { p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000003, + InputCostPerToken: bifrost.Ptr(0.000003), } // Falls back to base rate when tiered field is nil assert.Equal(t, 0.000003, tieredInputRate(&p, 300000)) @@ -724,8 +729,8 @@ func TestTieredInputRate_AboveThresholdNoTieredRate(t *testing.T) { func TestTieredOutputRate_AboveThreshold(t *testing.T) { p := configstoreTables.TableModelPricing{ - OutputCostPerToken: 0.000015, - OutputCostPerTokenAbove200kTokens: ptr(0.00003), + OutputCostPerToken: bifrost.Ptr(0.000015), + OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003), } assert.Equal(t, 0.00003, tieredOutputRate(&p, 250000)) } @@ -772,9 +777,9 @@ func TestExtractCostInput_TranscriptionWithSeconds(t *testing.T) { TranscriptionResponse: &schemas.BifrostTranscriptionResponse{ Usage: &schemas.TranscriptionUsage{ Seconds: &sec, - InputTokens: intPtr(1000), - OutputTokens: intPtr(200), - TotalTokens: intPtr(1200), + InputTokens: bifrost.Ptr(1000), + OutputTokens: bifrost.Ptr(200), + TotalTokens: bifrost.Ptr(1200), }, }, } @@ -833,7 +838,7 @@ func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 0.000005, OutputCostPerToken: 0.000015, + InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), }, }) @@ -853,7 +858,7 @@ func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) { }, } - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } @@ -865,11 +870,11 @@ func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 0.000005, OutputCostPerToken: 0.000015, + InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), }, makeKey("text-embedding-3-small", "openai", "embedding"): { Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding", - InputCostPerToken: 0.00000002, + InputCostPerToken: bifrost.Ptr(0.00000002), }, }) @@ -892,7 +897,7 @@ func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) { }, } - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // Only embedding cost: 500 * 0.00000002 = 0.00001 assert.InDelta(t, 0.00001, cost, 1e-12) } @@ -905,11 +910,11 @@ func TestCalculateCost_SemanticCacheMiss(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 0.000005, OutputCostPerToken: 0.000015, + InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015), }, makeKey("text-embedding-3-small", "openai", "embedding"): { Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding", - InputCostPerToken: 0.00000002, + InputCostPerToken: bifrost.Ptr(0.00000002), }, }) @@ -930,7 +935,7 @@ func TestCalculateCost_SemanticCacheMiss(t *testing.T) { }, } - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // Base cost: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125 // Embedding cost: 500 * 0.00000002 = 0.00001 // Total: 0.01251 @@ -951,7 +956,7 @@ func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) { }, } - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } @@ -961,7 +966,7 @@ func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) { func TestCalculateCost_NilResponse(t *testing.T) { mc := testCatalogWithPricing(nil) - assert.Equal(t, 0.0, mc.CalculateCost(nil)) + assert.Equal(t, 0.0, mc.CalculateCost(nil, nil)) } func TestCalculateCost_ProviderComputedCostPassthrough(t *testing.T) { @@ -978,7 +983,7 @@ func TestCalculateCost_ProviderComputedCostPassthrough(t *testing.T) { }, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.99, cost) } @@ -988,7 +993,7 @@ func TestCalculateCost_NoUsageData(t *testing.T) { }) resp := makeChatResponse(schemas.OpenAI, "gpt-4o", nil) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } @@ -997,9 +1002,9 @@ func TestCalculateCost_ChatCompletion_GPT4o(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): { Model: "gpt-4o", Provider: "openai", Mode: "chat", - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, - CacheReadInputTokenCost: ptr(0.0000005), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), + CacheReadInputTokenCost: bifrost.Ptr(0.0000005), }, }) @@ -1009,7 +1014,7 @@ func TestCalculateCost_ChatCompletion_GPT4o(t *testing.T) { TotalTokens: 12000, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // 10000*0.000005 + 2000*0.000015 = 0.05 + 0.03 = 0.08 assert.InDelta(t, 0.08, cost, 1e-12) } @@ -1019,12 +1024,12 @@ func TestCalculateCost_ChatCompletion_Claude35Sonnet_WithCache(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): { Model: "anthropic.claude-3-5-sonnet-20241022-v2:0", Provider: "bedrock", Mode: "chat", - InputCostPerToken: 0.000003, - OutputCostPerToken: 0.000015, - CacheReadInputTokenCost: ptr(0.0000003), - CacheCreationInputTokenCost: ptr(0.00000375), - InputCostPerTokenAbove200kTokens: ptr(0.000006), - OutputCostPerTokenAbove200kTokens: ptr(0.00003), + InputCostPerToken: bifrost.Ptr(0.000003), + OutputCostPerToken: bifrost.Ptr(0.000015), + CacheReadInputTokenCost: bifrost.Ptr(0.0000003), + CacheCreationInputTokenCost: bifrost.Ptr(0.00000375), + InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), + OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003), }, }) @@ -1038,7 +1043,7 @@ func TestCalculateCost_ChatCompletion_Claude35Sonnet_WithCache(t *testing.T) { }, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // Both cached read and write tokens are input-side deductions from promptTokens. // Input: (5000-3000-500)*0.000003 + 3000*0.0000003 + 500*0.00000375 = 0.0045 + 0.0009 + 0.001875 = 0.007275 // Output: 1000*0.000015 = 0.015 @@ -1051,8 +1056,8 @@ func TestCalculateCost_Embedding(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("amazon.titan-embed-text-v1", "bedrock", "embedding"): { Model: "amazon.titan-embed-text-v1", Provider: "bedrock", Mode: "embedding", - InputCostPerToken: 0.0000001, - OutputCostPerToken: 0, + InputCostPerToken: bifrost.Ptr(0.0000001), + OutputCostPerToken: bifrost.Ptr(0.0), }, }) @@ -1061,7 +1066,7 @@ func TestCalculateCost_Embedding(t *testing.T) { TotalTokens: 10000, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // 10000 * 0.0000001 = 0.001 assert.InDelta(t, 0.001, cost, 1e-12) } @@ -1070,8 +1075,8 @@ func TestCalculateCost_Rerank(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("amazon.rerank-v1:0", "bedrock", "rerank"): { Model: "amazon.rerank-v1:0", Provider: "bedrock", Mode: "rerank", - InputCostPerToken: 0, - OutputCostPerToken: 0, + InputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerToken: bifrost.Ptr(0.0), }, }) @@ -1080,7 +1085,7 @@ func TestCalculateCost_Rerank(t *testing.T) { TotalTokens: 500, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } @@ -1089,7 +1094,7 @@ func TestCalculateCost_ImageGeneration(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("dall-e-3", "aiml", "image_generation"): { Model: "dall-e-3", Provider: "aiml", Mode: "image_generation", - OutputCostPerImage: ptr(0.052), + OutputCostPerImage: bifrost.Ptr(0.052), }, }) @@ -1097,7 +1102,7 @@ func TestCalculateCost_ImageGeneration(t *testing.T) { OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3}, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // 3 * 0.052 = 0.156 assert.InDelta(t, 0.156, cost, 1e-12) } @@ -1119,7 +1124,7 @@ func TestCalculateCost_StreamRequestTypeNormalized(t *testing.T) { }, } - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.InDelta(t, 0.0125, cost, 1e-12) } @@ -1128,7 +1133,7 @@ func TestCalculateCost_NoPricingData(t *testing.T) { resp := makeChatResponse(schemas.OpenAI, "unknown-model", &schemas.BifrostLLMUsage{ PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.Equal(t, 0.0, cost) } @@ -1140,57 +1145,51 @@ func TestGetPricing_DirectLookup(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) - p, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest) - require.True(t, ok) - assert.Equal(t, 0.000005, p.InputCostPerToken) + p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) + assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestGetPricing_GeminiFallsBackToVertex(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gemini-2.0-flash", "vertex", "chat"): { Model: "gemini-2.0-flash", Provider: "vertex", Mode: "chat", - InputCostPerToken: 0.0000001, OutputCostPerToken: 0.0000004, + InputCostPerToken: bifrost.Ptr(0.0000001), OutputCostPerToken: bifrost.Ptr(0.0000004), }, }) - p, ok := mc.getPricing("gemini-2.0-flash", "gemini", schemas.ChatCompletionRequest) - require.True(t, ok) - assert.Equal(t, 0.0000001, p.InputCostPerToken) + p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"}) + assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken)) } func TestGetPricing_VertexStripsProviderPrefix(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004), }) - p, ok := mc.getPricing("google/gemini-2.0-flash", "vertex", schemas.ChatCompletionRequest) - require.True(t, ok) - assert.Equal(t, 0.0000001, p.InputCostPerToken) + p := mc.resolvePricing("vertex", "google/gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "vertex"}) + assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken)) } func TestGetPricing_BedrockAddsAnthropicPrefix(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): chatPricing(0.000003, 0.000015), }) - p, ok := mc.getPricing("claude-3-5-sonnet-20241022-v2:0", "bedrock", schemas.ChatCompletionRequest) - require.True(t, ok) - assert.Equal(t, 0.000003, p.InputCostPerToken) + p := mc.resolvePricing("bedrock", "claude-3-5-sonnet-20241022-v2:0", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "bedrock"}) + assert.Equal(t, 0.000003, derefF(p.InputCostPerToken)) } func TestGetPricing_ResponsesFallsBackToChat(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) - p, ok := mc.getPricing("gpt-4o", "openai", schemas.ResponsesRequest) - require.True(t, ok) - assert.Equal(t, 0.000005, p.InputCostPerToken) + p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "openai"}) + assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestGetPricing_ResponsesStreamFallsBackToChat(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015), }) - p, ok := mc.getPricing("gpt-4o", "openai", schemas.ResponsesStreamRequest) - require.True(t, ok) - assert.Equal(t, 0.000005, p.InputCostPerToken) + p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesStreamRequest, PricingLookupScopes{Provider: "openai"}) + assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestGetPricing_GeminiResponsesFallsBackToVertexChat(t *testing.T) { @@ -1198,15 +1197,14 @@ func TestGetPricing_GeminiResponsesFallsBackToVertexChat(t *testing.T) { makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004), }) // gemini provider + responses request → try vertex + responses → try vertex + chat - p, ok := mc.getPricing("gemini-2.0-flash", "gemini", schemas.ResponsesRequest) - require.True(t, ok) - assert.Equal(t, 0.0000001, p.InputCostPerToken) + p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "gemini"}) + assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken)) } func TestGetPricing_NotFound(t *testing.T) { mc := testCatalogWithPricing(nil) - _, ok := mc.getPricing("nonexistent", "openai", schemas.ChatCompletionRequest) - assert.False(t, ok) + p := mc.resolvePricing("openai", "nonexistent", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"}) + assert.Nil(t, p) } // ========================================================================= @@ -1219,9 +1217,9 @@ func TestResolvePricing_DeploymentFallback(t *testing.T) { }) // Model not found directly, but deployment matches - p := mc.resolvePricing("openai", "gpt-4o-custom", "my-deployment", schemas.ChatCompletionRequest) + p := mc.resolvePricing("openai", "gpt-4o-custom", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{}) require.NotNil(t, p) - assert.Equal(t, 0.000005, p.InputCostPerToken) + assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestResolvePricing_ModelFoundDirectly(t *testing.T) { @@ -1231,14 +1229,14 @@ func TestResolvePricing_ModelFoundDirectly(t *testing.T) { }) // Model found directly — doesn't fall back to deployment - p := mc.resolvePricing("openai", "gpt-4o", "my-deployment", schemas.ChatCompletionRequest) + p := mc.resolvePricing("openai", "gpt-4o", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{}) require.NotNil(t, p) - assert.Equal(t, 0.000005, p.InputCostPerToken) + assert.Equal(t, 0.000005, derefF(p.InputCostPerToken)) } func TestResolvePricing_NothingFound(t *testing.T) { mc := testCatalogWithPricing(nil) - p := mc.resolvePricing("openai", "unknown", "", schemas.ChatCompletionRequest) + p := mc.resolvePricing("openai", "unknown", "", schemas.ChatCompletionRequest, PricingLookupScopes{}) assert.Nil(t, p) } @@ -1327,14 +1325,14 @@ func TestCalculateCost_200kTier_EndToEnd(t *testing.T) { mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ makeKey("anthropic.claude-3-5-sonnet-20240620-v1:0", "bedrock", "chat"): { Model: "anthropic.claude-3-5-sonnet-20240620-v1:0", Provider: "bedrock", Mode: "chat", - InputCostPerToken: 0.000003, - OutputCostPerToken: 0.000015, - InputCostPerTokenAbove200kTokens: ptr(0.000006), - OutputCostPerTokenAbove200kTokens: ptr(0.00003), - CacheReadInputTokenCost: ptr(0.0000003), - CacheCreationInputTokenCost: ptr(0.00000375), - CacheReadInputTokenCostAbove200kTokens: ptr(0.0000006), - CacheCreationInputTokenCostAbove200kTokens: ptr(0.0000075), + InputCostPerToken: bifrost.Ptr(0.000003), + OutputCostPerToken: bifrost.Ptr(0.000015), + InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006), + OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003), + CacheReadInputTokenCost: bifrost.Ptr(0.0000003), + CacheCreationInputTokenCost: bifrost.Ptr(0.00000375), + CacheReadInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000006), + CacheCreationInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000075), }, }) @@ -1344,7 +1342,7 @@ func TestCalculateCost_200kTier_EndToEnd(t *testing.T) { TotalTokens: 210000, // Above 200k }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // Tiered rate: input=0.000006, output=0.00003 // 190000*0.000006 + 20000*0.00003 = 1.14 + 0.6 = 1.74 assert.InDelta(t, 1.74, cost, 1e-9) @@ -1365,14 +1363,14 @@ func TestCalculateCost_ProviderCostZeroTotalStillCalculates(t *testing.T) { }, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.InDelta(t, 0.0125, cost, 1e-12) } func TestCalculateCost_AllCachedTokens(t *testing.T) { // All prompt tokens are from cache p := chatPricing(0.000005, 0.000015) - p.CacheReadInputTokenCost = ptr(0.0000005) + p.CacheReadInputTokenCost = bifrost.Ptr(0.0000005) usage := &schemas.BifrostLLMUsage{ PromptTokens: 1000, @@ -1398,8 +1396,8 @@ func TestCalculateCost_ImageGeneration_NilUsage_PerImagePricing(t *testing.T) { Model: "dall-e-3", Provider: "openai", Mode: "image_generation", - InputCostPerToken: 0, - OutputCostPerImage: ptr(0.04), + InputCostPerToken: bifrost.Ptr(0.0), + OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ @@ -1407,7 +1405,7 @@ func TestCalculateCost_ImageGeneration_NilUsage_PerImagePricing(t *testing.T) { }) resp := makeImageResponse("openai", "dall-e-3", nil) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // 1 image * $0.04 = $0.04 assert.InDelta(t, 0.04, cost, 1e-12) } @@ -1418,8 +1416,8 @@ func TestCalculateCost_ImageGeneration_NilUsage_InputAndOutputPerImage(t *testin Model: "test-image-model", Provider: "test", Mode: "image_generation", - InputCostPerImage: ptr(0.01), - OutputCostPerImage: ptr(0.04), + InputCostPerImage: bifrost.Ptr(0.01), + OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ @@ -1427,7 +1425,7 @@ func TestCalculateCost_ImageGeneration_NilUsage_InputAndOutputPerImage(t *testin }) resp := makeImageResponse("test", "test-image-model", nil) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // NumInputImages is 0 (not populated from request), so only output pricing applies // 1 output image * $0.04 = $0.04 assert.InDelta(t, 0.04, cost, 1e-12) @@ -1439,8 +1437,8 @@ func TestCalculateCost_ImageGeneration_WithInputImages(t *testing.T) { Model: "gpt-image-1", Provider: "openai", Mode: "image_generation", - InputCostPerImage: ptr(0.01), - OutputCostPerImage: ptr(0.04), + InputCostPerImage: bifrost.Ptr(0.01), + OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ @@ -1450,7 +1448,7 @@ func TestCalculateCost_ImageGeneration_WithInputImages(t *testing.T) { resp := makeImageResponse("openai", "gpt-image-1", &schemas.ImageUsage{ NumInputImages: 2, }) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // 2 input images * $0.01 + 1 output image * $0.04 = $0.06 assert.InDelta(t, 0.06, cost, 1e-12) } @@ -1461,7 +1459,7 @@ func TestCalculateCost_ImageGeneration_OutputCountFromData(t *testing.T) { Model: "dall-e-3", Provider: "openai", Mode: "image_generation", - OutputCostPerImage: ptr(0.04), + OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ @@ -1482,7 +1480,7 @@ func TestCalculateCost_ImageGeneration_OutputCountFromData(t *testing.T) { }, }, } - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // 3 output images * $0.04 = $0.12 assert.InDelta(t, 0.12, cost, 1e-12) } @@ -1493,8 +1491,8 @@ func TestCalculateCost_ImageGeneration_NilUsage_NoPerImagePricing(t *testing.T) Model: "token-only-model", Provider: "test", Mode: "image_generation", - InputCostPerToken: 0.000001, - OutputCostPerToken: 0.000002, + InputCostPerToken: bifrost.Ptr(0.000001), + OutputCostPerToken: bifrost.Ptr(0.000002), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ @@ -1502,7 +1500,7 @@ func TestCalculateCost_ImageGeneration_NilUsage_NoPerImagePricing(t *testing.T) }) resp := makeImageResponse("test", "token-only-model", nil) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) // No per-image pricing and all tokens are zero → 0 assert.InDelta(t, 0.0, cost, 1e-12) } @@ -1513,7 +1511,7 @@ func TestCalculateCost_ImageGeneration_EmptyUsage_PerImagePricing(t *testing.T) Model: "dall-e-3", Provider: "openai", Mode: "image_generation", - OutputCostPerImage: ptr(0.04), + OutputCostPerImage: bifrost.Ptr(0.04), } mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{ @@ -1521,16 +1519,16 @@ func TestCalculateCost_ImageGeneration_EmptyUsage_PerImagePricing(t *testing.T) }) resp := makeImageResponse("openai", "dall-e-3", &schemas.ImageUsage{}) - cost := mc.CalculateCost(resp) + cost := mc.CalculateCost(resp, nil) assert.InDelta(t, 0.04, cost, 1e-12) } func TestComputeImageCost_MixedInputTokensOutputPerImage(t *testing.T) { // Input has tokens (text prompt), output has no tokens but per-image pricing p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, - OutputCostPerImage: ptr(0.04), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), + OutputCostPerImage: bifrost.Ptr(0.04), } usage := &schemas.ImageUsage{ InputTokens: 500, @@ -1545,9 +1543,9 @@ func TestComputeImageCost_MixedInputTokensOutputPerImage(t *testing.T) { func TestComputeImageCost_MixedInputPerImageOutputTokens(t *testing.T) { // Input has no tokens but per-image count, output has tokens p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, - InputCostPerImage: ptr(0.01), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), + InputCostPerImage: bifrost.Ptr(0.01), } usage := &schemas.ImageUsage{ NumInputImages: 3, @@ -1562,10 +1560,10 @@ func TestComputeImageCost_MixedInputPerImageOutputTokens(t *testing.T) { func TestComputeImageCost_BothHaveTokens_IgnoresPerImage(t *testing.T) { // Both sides have tokens — per-image pricing is ignored p := configstoreTables.TableModelPricing{ - InputCostPerToken: 0.000005, - OutputCostPerToken: 0.000015, - InputCostPerImage: ptr(0.01), - OutputCostPerImage: ptr(0.04), + InputCostPerToken: bifrost.Ptr(0.000005), + OutputCostPerToken: bifrost.Ptr(0.000015), + InputCostPerImage: bifrost.Ptr(0.01), + OutputCostPerImage: bifrost.Ptr(0.04), } usage := &schemas.ImageUsage{ InputTokens: 200, diff --git a/framework/modelcatalog/utils.go b/framework/modelcatalog/utils.go index c477696c6a..4808ee844d 100644 --- a/framework/modelcatalog/utils.go +++ b/framework/modelcatalog/utils.go @@ -3,6 +3,7 @@ package modelcatalog import ( "strings" + "github.com/bytedance/sonic" "github.com/maximhq/bifrost/core/schemas" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" ) @@ -163,11 +164,7 @@ func convertPricingDataToTableModelPricing(modelKey string, entry PricingEntry) // convertTableModelPricingToPricingData converts the TableModelPricing struct to a PricingEntry struct func convertTableModelPricingToPricingData(pricing *configstoreTables.TableModelPricing) *PricingEntry { - return &PricingEntry{ - BaseModel: pricing.BaseModel, - Provider: pricing.Provider, - Mode: pricing.Mode, - + options := PricingOptions{ // Costs - Text InputCostPerToken: pricing.InputCostPerToken, OutputCostPerToken: pricing.OutputCostPerToken, @@ -230,4 +227,30 @@ func convertTableModelPricingToPricingData(pricing *configstoreTables.TableModel SearchContextCostPerQuery: pricing.SearchContextCostPerQuery, CodeInterpreterCostPerSession: pricing.CodeInterpreterCostPerSession, } + return &PricingEntry{ + BaseModel: pricing.BaseModel, + Provider: pricing.Provider, + Mode: pricing.Mode, + PricingOptions: options, + } +} + +// convertTablePricingOverrideToPricingOverride converts a TablePricingOverride to a PricingOverride. +func convertTablePricingOverrideToPricingOverride(override *configstoreTables.TablePricingOverride) (PricingOverride, error) { + var options PricingOptions + if err := sonic.Unmarshal([]byte(override.PricingPatchJSON), &options); err != nil { + return PricingOverride{}, err + } + return PricingOverride{ + ID: override.ID, + Name: override.Name, + ScopeKind: ScopeKind(override.ScopeKind), + VirtualKeyID: override.VirtualKeyID, + ProviderID: override.ProviderID, + ProviderKeyID: override.ProviderKeyID, + MatchType: MatchType(override.MatchType), + Pattern: override.Pattern, + RequestTypes: override.RequestTypes, + Options: options, + }, nil } diff --git a/framework/streaming/audio.go b/framework/streaming/audio.go index d36fb47d36..9cc2aa6924 100644 --- a/framework/streaming/audio.go +++ b/framework/streaming/audio.go @@ -8,6 +8,7 @@ import ( bifrost "github.com/maximhq/bifrost/core" schemas "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/modelcatalog" ) // buildCompleteMessageFromAudioStreamChunks builds a complete message from accumulated audio chunks @@ -145,7 +146,7 @@ func (a *Accumulator) processAudioStreamingResponse(ctx *schemas.BifrostContext, chunk.ChunkIndex = result.SpeechStreamResponse.ExtraFields.ChunkIndex if isFinalChunk { if a.pricingManager != nil { - cost := a.pricingManager.CalculateCost(result) + cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider))) chunk.Cost = bifrost.Ptr(cost) } chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug diff --git a/framework/streaming/chat.go b/framework/streaming/chat.go index dafd170902..1d87106913 100644 --- a/framework/streaming/chat.go +++ b/framework/streaming/chat.go @@ -8,6 +8,7 @@ import ( bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/modelcatalog" ) // deepCopyChatStreamDelta creates a deep copy of ChatStreamResponseChoiceDelta @@ -497,7 +498,7 @@ func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext, chunk.ChunkIndex = result.TextCompletionResponse.ExtraFields.ChunkIndex if isFinalChunk { if a.pricingManager != nil { - cost := a.pricingManager.CalculateCost(result) + cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider))) chunk.Cost = bifrost.Ptr(cost) } chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug @@ -523,7 +524,7 @@ func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext, } if isFinalChunk { if a.pricingManager != nil { - cost := a.pricingManager.CalculateCost(result) + cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider))) chunk.Cost = bifrost.Ptr(cost) } chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug diff --git a/framework/streaming/images.go b/framework/streaming/images.go index 23b2dd8f5c..446d1ca3b3 100644 --- a/framework/streaming/images.go +++ b/framework/streaming/images.go @@ -8,6 +8,7 @@ import ( bifrost "github.com/maximhq/bifrost/core" schemas "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/modelcatalog" ) // buildCompleteImageFromImageStreamChunks builds a complete image generation response from accumulated chunks @@ -273,7 +274,7 @@ func (a *Accumulator) processImageStreamingResponse(ctx *schemas.BifrostContext, if isFinalChunk { if a.pricingManager != nil { - cost := a.pricingManager.CalculateCost(result) + cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider))) chunk.Cost = bifrost.Ptr(cost) } chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug diff --git a/framework/streaming/responses.go b/framework/streaming/responses.go index 62a4739c6d..aa38248167 100644 --- a/framework/streaming/responses.go +++ b/framework/streaming/responses.go @@ -8,6 +8,7 @@ import ( bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/modelcatalog" ) // deepCopyResponsesStreamResponse creates a deep copy of BifrostResponsesStreamResponse @@ -917,7 +918,7 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont chunk.ChunkIndex = result.ResponsesStreamResponse.ExtraFields.ChunkIndex if isFinalChunk { if a.pricingManager != nil { - cost := a.pricingManager.CalculateCost(result) + cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider))) chunk.Cost = bifrost.Ptr(cost) } chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug diff --git a/framework/streaming/transcription.go b/framework/streaming/transcription.go index 593c7f80b2..56fb3e477c 100644 --- a/framework/streaming/transcription.go +++ b/framework/streaming/transcription.go @@ -8,6 +8,7 @@ import ( bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/modelcatalog" ) // buildCompleteMessageFromTranscriptionStreamChunks builds a complete message from accumulated transcription chunks @@ -162,7 +163,7 @@ func (a *Accumulator) processTranscriptionStreamingResponse(ctx *schemas.Bifrost } if isFinalChunk { if a.pricingManager != nil { - cost := a.pricingManager.CalculateCost(result) + cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider))) chunk.Cost = bifrost.Ptr(cost) } chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug diff --git a/framework/tracing/tracer.go b/framework/tracing/tracer.go index 3d55ca2ff5..c5088b17ed 100644 --- a/framework/tracing/tracer.go +++ b/framework/tracing/tracer.go @@ -164,7 +164,7 @@ func (t *Tracer) PopulateLLMRequestAttributes(handle schemas.SpanHandle, req *sc } // PopulateLLMResponseAttributes populates all LLM-specific response attributes on the span. -func (t *Tracer) PopulateLLMResponseAttributes(handle schemas.SpanHandle, resp *schemas.BifrostResponse, err *schemas.BifrostError) { +func (t *Tracer) PopulateLLMResponseAttributes(ctx *schemas.BifrostContext, handle schemas.SpanHandle, resp *schemas.BifrostResponse, err *schemas.BifrostError) { h, ok := handle.(*spanHandle) if !ok || h == nil { return @@ -185,7 +185,7 @@ func (t *Tracer) PopulateLLMResponseAttributes(handle schemas.SpanHandle, resp * } // Populate cost attribute using pricing manager if t.pricingManager != nil && resp != nil { - cost := t.pricingManager.CalculateCost(resp) + cost := t.pricingManager.CalculateCost(resp, modelcatalog.PricingLookupScopesFromContext(ctx, string(resp.GetExtraFields().Provider))) span.SetAttribute(schemas.AttrUsageCost, cost) } } diff --git a/plugins/governance/main.go b/plugins/governance/main.go index 200afe1237..0f6e8dd796 100644 --- a/plugins/governance/main.go +++ b/plugins/governance/main.go @@ -1223,6 +1223,9 @@ func (p *GovernancePlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche isFinalChunk := bifrost.IsFinalChunk(ctx) + // Build pricing scopes from context using the governance VK ID (not the raw VK token) + pricingScopes := modelcatalog.PricingLookupScopesFromContext(ctx, string(provider)) + // Always process usage tracking (with or without virtual key) // When user auth is present, skip VK usage tracking to avoid double-counting effectiveVK := virtualKey @@ -1235,7 +1238,7 @@ func (p *GovernancePlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche p.wg.Add(1) go func() { defer p.wg.Done() - p.postHookWorker(result, provider, model, requestType, effectiveVK, requestID, userID, isCacheRead, isBatch, isFinalChunk) + p.postHookWorker(result, provider, model, requestType, effectiveVK, requestID, userID, isCacheRead, isBatch, isFinalChunk, pricingScopes) }() } @@ -1419,13 +1422,15 @@ func (p *GovernancePlugin) Cleanup() error { // - provider: The provider of the request // - model: The model of the request // - requestType: The type of the request -// - virtualKey: The virtual key of the request (empty string if not present) +// - virtualKey: The raw virtual key token of the request (empty string if not present) +// - selectedKeyID: The selected provider key ID used for scoped pricing overrides // - requestID: The request ID // - userID: The user ID for enterprise user-level governance (empty string if not present) // - isCacheRead: Whether the request is a cache read // - isBatch: Whether the request is a batch request // - isFinalChunk: Whether the request is the final chunk -func (p *GovernancePlugin) postHookWorker(result *schemas.BifrostResponse, provider schemas.ModelProvider, model string, requestType schemas.RequestType, virtualKey, requestID, userID string, _, _, isFinalChunk bool) { +// - pricingScopes: Prebuilt pricing lookup scopes using governance VK ID (nil if not applicable) +func (p *GovernancePlugin) postHookWorker(result *schemas.BifrostResponse, provider schemas.ModelProvider, model string, requestType schemas.RequestType, virtualKey, requestID, userID string, _, _, isFinalChunk bool, pricingScopes *modelcatalog.PricingLookupScopes) { // Determine if request was successful success := (result != nil) @@ -1435,7 +1440,7 @@ func (p *GovernancePlugin) postHookWorker(result *schemas.BifrostResponse, provi if !isStreaming || (isStreaming && isFinalChunk) { var cost float64 if p.modelCatalog != nil && result != nil { - cost = p.modelCatalog.CalculateCost(result) + cost = p.modelCatalog.CalculateCost(result, pricingScopes) } tokensUsed := 0 if result != nil { diff --git a/plugins/logging/main.go b/plugins/logging/main.go index 3a2e501b69..160b67d064 100644 --- a/plugins/logging/main.go +++ b/plugins/logging/main.go @@ -226,7 +226,7 @@ type LoggerPlugin struct { pendingLogs sync.Map // Maps requestID -> *PendingLogData (PreLLMHook input data awaiting PostLLMHook) writeQueue chan *writeQueueEntry // Buffered channel for batch write queue closed atomic.Bool // Set during cleanup to prevent sends on closed writeQueue - deferredUsageSem chan struct{} // Limits concurrent deferred usage DB updates + deferredUsageSem chan struct{} // Limits concurrent deferred usage DB updates } // Init creates new logger plugin with given log store @@ -778,7 +778,8 @@ func (p *LoggerPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *schemas. } entry.CacheDebugParsed = cacheDebug if p.pricingManager != nil { - if cost := p.pricingManager.CalculateCost(result); cost > 0 { + pricingScopes := modelcatalog.PricingLookupScopesFromContext(ctx, string(entry.Provider)) + if cost := p.pricingManager.CalculateCost(result, pricingScopes); cost > 0 { entry.Cost = &cost } } diff --git a/plugins/logging/operations.go b/plugins/logging/operations.go index 59d61987d9..9e41a6322b 100644 --- a/plugins/logging/operations.go +++ b/plugins/logging/operations.go @@ -9,6 +9,7 @@ import ( "github.com/bytedance/sonic" "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/logstore" + "github.com/maximhq/bifrost/framework/modelcatalog" "github.com/maximhq/bifrost/framework/streaming" ) @@ -1019,7 +1020,8 @@ func (p *LoggerPlugin) calculateCostForLog(logEntry *logstore.Log) (float64, err resp.SpeechResponse.Usage = logEntry.SpeechOutputParsed.Usage } - return p.pricingManager.CalculateCost(resp), nil + scopes := pricingScopesForLog(logEntry) + return p.pricingManager.CalculateCost(resp, &scopes), nil } // buildResponseForRequestType wraps BifrostLLMUsage into the correct response @@ -1067,19 +1069,19 @@ func buildResponseForRequestType(requestType schemas.RequestType, usage *schemas CachedWriteTokens: usage.PromptTokensDetails.CachedWriteTokens, } } - if usage.CompletionTokensDetails != nil { - respUsage.OutputTokensDetails = &schemas.ResponsesResponseOutputTokens{ - TextTokens: usage.CompletionTokensDetails.TextTokens, - AcceptedPredictionTokens: usage.CompletionTokensDetails.AcceptedPredictionTokens, - AudioTokens: usage.CompletionTokensDetails.AudioTokens, - ImageTokens: usage.CompletionTokensDetails.ImageTokens, - ReasoningTokens: usage.CompletionTokensDetails.ReasoningTokens, - RejectedPredictionTokens: usage.CompletionTokensDetails.RejectedPredictionTokens, - CitationTokens: usage.CompletionTokensDetails.CitationTokens, - NumSearchQueries: usage.CompletionTokensDetails.NumSearchQueries, + if usage.CompletionTokensDetails != nil { + respUsage.OutputTokensDetails = &schemas.ResponsesResponseOutputTokens{ + TextTokens: usage.CompletionTokensDetails.TextTokens, + AcceptedPredictionTokens: usage.CompletionTokensDetails.AcceptedPredictionTokens, + AudioTokens: usage.CompletionTokensDetails.AudioTokens, + ImageTokens: usage.CompletionTokensDetails.ImageTokens, + ReasoningTokens: usage.CompletionTokensDetails.ReasoningTokens, + RejectedPredictionTokens: usage.CompletionTokensDetails.RejectedPredictionTokens, + CitationTokens: usage.CompletionTokensDetails.CitationTokens, + NumSearchQueries: usage.CompletionTokensDetails.NumSearchQueries, + } } } - } return &schemas.BifrostResponse{ ResponsesResponse: &schemas.BifrostResponsesResponse{ Usage: respUsage, @@ -1151,3 +1153,20 @@ func buildResponseForRequestType(requestType schemas.RequestType, usage *schemas } } } + +func pricingScopesForLog(logEntry *logstore.Log) modelcatalog.PricingLookupScopes { + if logEntry == nil { + return modelcatalog.PricingLookupScopes{} + } + + virtualKeyID := "" + if logEntry.VirtualKeyID != nil { + virtualKeyID = *logEntry.VirtualKeyID + } + + return modelcatalog.PricingLookupScopes{ + Provider: logEntry.Provider, + SelectedKeyID: logEntry.SelectedKeyID, + VirtualKeyID: virtualKeyID, + } +} diff --git a/plugins/telemetry/main.go b/plugins/telemetry/main.go index f8c2efe319..58d0bb07ed 100644 --- a/plugins/telemetry/main.go +++ b/plugins/telemetry/main.go @@ -425,6 +425,8 @@ func (p *PrometheusPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche streamEndIndicatorValue := ctx.Value(schemas.BifrostContextKeyStreamEndIndicator) isFinalChunk, hasFinalChunkIndicator := streamEndIndicatorValue.(bool) + pricingScopes := modelcatalog.PricingLookupScopesFromContext(ctx, string(provider)) + // Calculate cost and record metrics in a separate goroutine to avoid blocking the main thread go func() { // For streaming requests, handle per-token metrics for intermediate chunks @@ -447,7 +449,7 @@ func (p *PrometheusPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche cost := 0.0 if p.pricingManager != nil && result != nil { - cost = p.pricingManager.CalculateCost(result) + cost = p.pricingManager.CalculateCost(result, pricingScopes) } p.UpstreamRequestsTotal.WithLabelValues(promLabelValues...).Inc() diff --git a/transports/bifrost-http/handlers/governance.go b/transports/bifrost-http/handlers/governance.go index 731fd3b4b9..a0ee441bbf 100644 --- a/transports/bifrost-http/handlers/governance.go +++ b/transports/bifrost-http/handlers/governance.go @@ -19,6 +19,7 @@ import ( "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/configstore" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" + "github.com/maximhq/bifrost/framework/modelcatalog" "github.com/maximhq/bifrost/plugins/governance" "github.com/maximhq/bifrost/transports/bifrost-http/lib" "github.com/valyala/fasthttp" @@ -40,6 +41,8 @@ type GovernanceManager interface { RemoveProvider(ctx context.Context, provider schemas.ModelProvider) error ReloadRoutingRule(ctx context.Context, id string) error RemoveRoutingRule(ctx context.Context, id string) error + UpsertPricingOverride(ctx context.Context, override *configstoreTables.TablePricingOverride) error + DeletePricingOverride(ctx context.Context, id string) error } // GovernanceHandler manages HTTP requests for governance operations @@ -296,6 +299,12 @@ func (h *GovernanceHandler) RegisterRoutes(r *router.Router, middlewares ...sche r.GET("/api/governance/providers", lib.ChainMiddlewares(h.getProviderGovernance, middlewares...)) r.PUT("/api/governance/providers/{provider_name}", lib.ChainMiddlewares(h.updateProviderGovernance, middlewares...)) r.DELETE("/api/governance/providers/{provider_name}", lib.ChainMiddlewares(h.deleteProviderGovernance, middlewares...)) + + // Pricing override operations + r.GET("/api/governance/pricing-overrides", lib.ChainMiddlewares(h.getPricingOverrides, middlewares...)) + r.POST("/api/governance/pricing-overrides", lib.ChainMiddlewares(h.createPricingOverride, middlewares...)) + r.PUT("/api/governance/pricing-overrides/{id}", lib.ChainMiddlewares(h.updatePricingOverride, middlewares...)) + r.DELETE("/api/governance/pricing-overrides/{id}", lib.ChainMiddlewares(h.deletePricingOverride, middlewares...)) } // Virtual Key CRUD Operations @@ -3244,6 +3253,376 @@ func (h *GovernanceHandler) deleteRoutingRule(ctx *fasthttp.RequestCtx) { }) } +// --------------------------------------------------------------------------- +// Pricing Override Operations +// --------------------------------------------------------------------------- + +// CreatePricingOverrideRequest is the request payload for creating a governance +// pricing override. +type CreatePricingOverrideRequest struct { + Name string `json:"name"` + ScopeKind modelcatalog.ScopeKind `json:"scope_kind"` + VirtualKeyID *string `json:"virtual_key_id,omitempty"` + ProviderID *string `json:"provider_id,omitempty"` + ProviderKeyID *string `json:"provider_key_id,omitempty"` + MatchType modelcatalog.MatchType `json:"match_type"` + Pattern string `json:"pattern"` + RequestTypes []schemas.RequestType `json:"request_types,omitempty"` + Patch modelcatalog.PricingOptions `json:"patch,omitempty"` +} + +// nullableString tracks whether a JSON string field was explicitly present in +// the request body (even as null), so the merge logic can distinguish "omitted" +// (leave existing value) from "set to null" (clear the value). +type nullableString struct { + Value *string + Set bool +} + +func (n *nullableString) UnmarshalJSON(b []byte) error { + n.Set = true + if string(b) == "null" { + n.Value = nil + return nil + } + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + n.Value = &s + return nil +} + +// UpdatePricingOverrideRequest is the request payload for updating a governance +// pricing override. All fields except Patch are optional — omitted fields are +// merged from the existing record. Patch is always replaced in full. +type UpdatePricingOverrideRequest struct { + Name *string `json:"name,omitempty"` + ScopeKind *modelcatalog.ScopeKind `json:"scope_kind,omitempty"` + VirtualKeyID nullableString `json:"virtual_key_id"` + ProviderID nullableString `json:"provider_id"` + ProviderKeyID nullableString `json:"provider_key_id"` + MatchType *modelcatalog.MatchType `json:"match_type,omitempty"` + Pattern *string `json:"pattern,omitempty"` + RequestTypes []schemas.RequestType `json:"request_types,omitempty"` + Patch *modelcatalog.PricingOptions `json:"patch,omitempty"` +} + +func (h *GovernanceHandler) getPricingOverrides(ctx *fasthttp.RequestCtx) { + // Parse filter parameters + var scopeKind, virtualKeyID, providerID, providerKeyID *string + if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("scope_kind"))); v != "" { + scopeKind = &v + } + if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("virtual_key_id"))); v != "" { + virtualKeyID = &v + } + if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("provider_id"))); v != "" { + providerID = &v + } + if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("provider_key_id"))); v != "" { + providerKeyID = &v + } + + // Check for pagination parameters + limitStr := string(ctx.QueryArgs().Peek("limit")) + offsetStr := string(ctx.QueryArgs().Peek("offset")) + search := string(ctx.QueryArgs().Peek("search")) + + if limitStr != "" || offsetStr != "" || search != "" { + params := configstore.PricingOverridesQueryParams{ + Search: search, + ScopeKind: scopeKind, + VirtualKeyID: virtualKeyID, + ProviderID: providerID, + ProviderKeyID: providerKeyID, + } + if limitStr != "" { + n, err := strconv.Atoi(limitStr) + if err != nil { + SendError(ctx, 400, "Invalid limit parameter: must be a number") + return + } + if n < 0 { + SendError(ctx, 400, "Invalid limit parameter: must be non-negative") + return + } + params.Limit = n + } + if offsetStr != "" { + n, err := strconv.Atoi(offsetStr) + if err != nil { + SendError(ctx, 400, "Invalid offset parameter: must be a number") + return + } + if n < 0 { + SendError(ctx, 400, "Invalid offset parameter: must be non-negative") + return + } + params.Offset = n + } + + params.Limit, params.Offset = ClampPaginationParams(params.Limit, params.Offset) + overrides, totalCount, err := h.configStore.GetPricingOverridesPaginated(ctx, params) + if err != nil { + logger.Error("failed to retrieve pricing overrides: %v", err) + SendError(ctx, fasthttp.StatusInternalServerError, "Failed to retrieve pricing overrides") + return + } + SendJSON(ctx, map[string]interface{}{ + "pricing_overrides": overrides, + "count": len(overrides), + "total_count": totalCount, + "limit": params.Limit, + "offset": params.Offset, + }) + return + } + + // Non-paginated path: return all matching overrides (backward compatible) + filters := configstore.PricingOverrideFilters{ + ScopeKind: scopeKind, + VirtualKeyID: virtualKeyID, + ProviderID: providerID, + ProviderKeyID: providerKeyID, + } + overrides, err := h.configStore.GetPricingOverrides(ctx, filters) + if err != nil { + logger.Error("failed to retrieve pricing overrides: %v", err) + SendError(ctx, fasthttp.StatusInternalServerError, "Failed to retrieve pricing overrides") + return + } + + SendJSON(ctx, map[string]interface{}{ + "pricing_overrides": overrides, + "count": len(overrides), + "total_count": len(overrides), + "limit": len(overrides), + "offset": 0, + }) +} + +func (h *GovernanceHandler) createPricingOverride(ctx *fasthttp.RequestCtx) { + var req CreatePricingOverrideRequest + if err := json.Unmarshal(ctx.PostBody(), &req); err != nil { + SendError(ctx, fasthttp.StatusBadRequest, "Invalid JSON") + return + } + + name, err := normalizeAndValidatePricingOverrideName(req.Name) + if err != nil { + SendError(ctx, fasthttp.StatusBadRequest, err.Error()) + return + } + + shape := modelcatalog.PricingOverride{ + ScopeKind: req.ScopeKind, + VirtualKeyID: req.VirtualKeyID, + ProviderID: req.ProviderID, + ProviderKeyID: req.ProviderKeyID, + MatchType: req.MatchType, + Pattern: req.Pattern, + RequestTypes: req.RequestTypes, + } + if err := shape.IsValid(); err != nil { + SendError(ctx, fasthttp.StatusBadRequest, err.Error()) + return + } + + patchJSON, err := sonic.Marshal(req.Patch) + if err != nil { + SendError(ctx, fasthttp.StatusBadRequest, "Invalid patch") + return + } + + now := time.Now() + override := configstoreTables.TablePricingOverride{ + ID: uuid.NewString(), + Name: name, + ScopeKind: string(req.ScopeKind), + VirtualKeyID: normalizeOptionalString(req.VirtualKeyID), + ProviderID: normalizeOptionalString(req.ProviderID), + ProviderKeyID: normalizeOptionalString(req.ProviderKeyID), + MatchType: string(req.MatchType), + Pattern: strings.TrimSpace(req.Pattern), + RequestTypes: req.RequestTypes, + PricingPatchJSON: string(patchJSON), + ConfigHash: "", + CreatedAt: now, + UpdatedAt: now, + } + + if err := h.configStore.CreatePricingOverride(ctx, &override); err != nil { + logger.Error("failed to create pricing override: %v", err) + SendError(ctx, fasthttp.StatusInternalServerError, "Failed to create pricing override") + return + } + + if err := h.governanceManager.UpsertPricingOverride(ctx, &override); err != nil { + logger.Error("failed to upsert pricing override: %v", err) + SendError(ctx, fasthttp.StatusInternalServerError, "Failed to upsert pricing override") + return + } + SendJSONWithStatus(ctx, map[string]interface{}{ + "message": "Pricing override created successfully", + "pricing_override": override, + }, fasthttp.StatusCreated) +} + +func (h *GovernanceHandler) updatePricingOverride(ctx *fasthttp.RequestCtx) { + id := ctx.UserValue("id").(string) + + var req UpdatePricingOverrideRequest + if err := json.Unmarshal(ctx.PostBody(), &req); err != nil { + SendError(ctx, fasthttp.StatusBadRequest, "Invalid JSON") + return + } + + existing, err := h.configStore.GetPricingOverrideByID(ctx, id) + if err != nil { + if errors.Is(err, configstore.ErrNotFound) { + SendError(ctx, fasthttp.StatusNotFound, "Pricing override not found") + return + } + SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("Failed to retrieve pricing override: %v", err)) + return + } + + // Merge request fields onto the existing record; omitted fields keep their current values. + merged := modelcatalog.PricingOverride{ + ScopeKind: modelcatalog.ScopeKind(existing.ScopeKind), + VirtualKeyID: existing.VirtualKeyID, + ProviderID: existing.ProviderID, + ProviderKeyID: existing.ProviderKeyID, + MatchType: modelcatalog.MatchType(existing.MatchType), + Pattern: existing.Pattern, + RequestTypes: existing.RequestTypes, + } + if req.ScopeKind != nil { + merged.ScopeKind = *req.ScopeKind + // Changing scope_kind resets all scope IDs; only what the request + // explicitly provides will be kept. + merged.VirtualKeyID = nil + merged.ProviderID = nil + merged.ProviderKeyID = nil + } + if req.VirtualKeyID.Set { + merged.VirtualKeyID = req.VirtualKeyID.Value + } + if req.ProviderID.Set { + merged.ProviderID = req.ProviderID.Value + } + if req.ProviderKeyID.Set { + merged.ProviderKeyID = req.ProviderKeyID.Value + } + if req.MatchType != nil { + merged.MatchType = *req.MatchType + } + if req.Pattern != nil { + merged.Pattern = *req.Pattern + } + if req.RequestTypes != nil { + merged.RequestTypes = req.RequestTypes + } + + if err := merged.IsValid(); err != nil { + SendError(ctx, fasthttp.StatusBadRequest, err.Error()) + return + } + + // Resolve name: use provided value or fall back to existing. + nameStr := existing.Name + if req.Name != nil { + nameStr, err = normalizeAndValidatePricingOverrideName(*req.Name) + if err != nil { + SendError(ctx, fasthttp.StatusBadRequest, err.Error()) + return + } + } + + // Patch JSON: always replace in full with whatever is provided (or keep existing if omitted). + pricingPatchJSON := existing.PricingPatchJSON + if req.Patch != nil { + b, err := sonic.Marshal(req.Patch) + if err != nil { + SendError(ctx, fasthttp.StatusBadRequest, "Invalid patch") + return + } + pricingPatchJSON = string(b) + } + + override := configstoreTables.TablePricingOverride{ + ID: id, + Name: nameStr, + ScopeKind: string(merged.ScopeKind), + VirtualKeyID: normalizeOptionalString(merged.VirtualKeyID), + ProviderID: normalizeOptionalString(merged.ProviderID), + ProviderKeyID: normalizeOptionalString(merged.ProviderKeyID), + MatchType: string(merged.MatchType), + Pattern: strings.TrimSpace(merged.Pattern), + RequestTypes: merged.RequestTypes, + PricingPatchJSON: pricingPatchJSON, + ConfigHash: existing.ConfigHash, + CreatedAt: existing.CreatedAt, + UpdatedAt: time.Now(), + } + + if err := h.configStore.UpdatePricingOverride(ctx, &override); err != nil { + logger.Error("failed to update pricing override: %v", err) + SendError(ctx, fasthttp.StatusInternalServerError, "Failed to update pricing override") + return + } + + if err := h.governanceManager.UpsertPricingOverride(ctx, &override); err != nil { + logger.Error("failed to upsert pricing override: %v", err) + SendError(ctx, fasthttp.StatusInternalServerError, "Failed to upsert pricing override") + return + } + SendJSON(ctx, map[string]interface{}{ + "message": "Pricing override updated successfully", + "pricing_override": override, + }) +} + +func (h *GovernanceHandler) deletePricingOverride(ctx *fasthttp.RequestCtx) { + id := ctx.UserValue("id").(string) + if err := h.configStore.DeletePricingOverride(ctx, id); err != nil { + if errors.Is(err, configstore.ErrNotFound) { + SendError(ctx, fasthttp.StatusNotFound, "Pricing override not found") + return + } + logger.Error("failed to delete pricing override: %v", err) + SendError(ctx, fasthttp.StatusInternalServerError, "Failed to delete pricing override") + return + } + + if err := h.governanceManager.DeletePricingOverride(ctx, id); err != nil { + logger.Warn("failed to delete pricing override from memory: %v", err) + } + SendJSON(ctx, map[string]interface{}{ + "message": "Pricing override deleted successfully", + }) +} + +func normalizeAndValidatePricingOverrideName(name string) (string, error) { + trimmed := strings.TrimSpace(name) + if trimmed == "" { + return "", errors.New("name is required") + } + return trimmed, nil +} + +func normalizeOptionalString(value *string) *string { + if value == nil { + return nil + } + trimmed := strings.TrimSpace(*value) + if trimmed == "" { + return nil + } + return &trimmed +} + // validRoutingScopes contains the allowed scope values for routing rules var validRoutingScopes = map[string]bool{ "global": true, diff --git a/transports/bifrost-http/handlers/inference.go b/transports/bifrost-http/handlers/inference.go index 326ba7d342..e4a9ee9028 100644 --- a/transports/bifrost-http/handlers/inference.go +++ b/transports/bifrost-http/handlers/inference.go @@ -745,9 +745,12 @@ func (h *CompletionHandler) listModels(ctx *fasthttp.RequestCtx) { pricingEntry = h.config.ModelCatalog.GetPricingEntryForModel(*modelEntry.Deployment, provider) } if pricingEntry != nil && modelEntry.Pricing == nil { - pricing := &schemas.Pricing{ - Prompt: bifrost.Ptr(fmt.Sprintf("%.10f", pricingEntry.InputCostPerToken)), - Completion: bifrost.Ptr(fmt.Sprintf("%.10f", pricingEntry.OutputCostPerToken)), + pricing := &schemas.Pricing{} + if pricingEntry.InputCostPerToken != nil { + pricing.Prompt = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.InputCostPerToken)) + } + if pricingEntry.OutputCostPerToken != nil { + pricing.Completion = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.OutputCostPerToken)) } if pricingEntry.InputCostPerImage != nil { pricing.Image = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.InputCostPerImage)) @@ -755,6 +758,9 @@ func (h *CompletionHandler) listModels(ctx *fasthttp.RequestCtx) { if pricingEntry.CacheReadInputTokenCost != nil { pricing.InputCacheRead = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.CacheReadInputTokenCost)) } + if pricingEntry.CacheCreationInputTokenCost != nil { + pricing.InputCacheWrite = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.CacheCreationInputTokenCost)) + } resp.Data[i].Pricing = pricing } } diff --git a/transports/bifrost-http/handlers/pricing_override_test.go b/transports/bifrost-http/handlers/pricing_override_test.go new file mode 100644 index 0000000000..4d19d0541e --- /dev/null +++ b/transports/bifrost-http/handlers/pricing_override_test.go @@ -0,0 +1,149 @@ +package handlers + +import ( + "context" + "encoding/json" + "net" + "os" + "testing" + "time" + + "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/configstore" + configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" + "github.com/maximhq/bifrost/framework/modelcatalog" + "github.com/maximhq/bifrost/plugins/governance" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/valyala/fasthttp" +) + +type pricingOverrideTestGovernanceManager struct{} + +func (pricingOverrideTestGovernanceManager) GetGovernanceData() *governance.GovernanceData { + return nil +} +func (pricingOverrideTestGovernanceManager) ReloadVirtualKey(context.Context, string) (*configstoreTables.TableVirtualKey, error) { + return nil, nil +} +func (pricingOverrideTestGovernanceManager) RemoveVirtualKey(context.Context, string) error { + return nil +} +func (pricingOverrideTestGovernanceManager) ReloadTeam(context.Context, string) (*configstoreTables.TableTeam, error) { + return nil, nil +} +func (pricingOverrideTestGovernanceManager) RemoveTeam(context.Context, string) error { + return nil +} +func (pricingOverrideTestGovernanceManager) ReloadCustomer(context.Context, string) (*configstoreTables.TableCustomer, error) { + return nil, nil +} +func (pricingOverrideTestGovernanceManager) RemoveCustomer(context.Context, string) error { + return nil +} +func (pricingOverrideTestGovernanceManager) ReloadModelConfig(context.Context, string) (*configstoreTables.TableModelConfig, error) { + return nil, nil +} +func (pricingOverrideTestGovernanceManager) RemoveModelConfig(context.Context, string) error { + return nil +} +func (pricingOverrideTestGovernanceManager) ReloadProvider(context.Context, schemas.ModelProvider) (*configstoreTables.TableProvider, error) { + return nil, nil +} +func (pricingOverrideTestGovernanceManager) RemoveProvider(context.Context, schemas.ModelProvider) error { + return nil +} +func (pricingOverrideTestGovernanceManager) ReloadRoutingRule(context.Context, string) error { + return nil +} +func (pricingOverrideTestGovernanceManager) RemoveRoutingRule(context.Context, string) error { + return nil +} +func (pricingOverrideTestGovernanceManager) UpsertPricingOverride(context.Context, *configstoreTables.TablePricingOverride) error { + return nil +} +func (pricingOverrideTestGovernanceManager) DeletePricingOverride(context.Context, string) error { + return nil +} + +func setupPricingOverrideHandlerStore(t *testing.T) configstore.ConfigStore { + t.Helper() + + dbPath := t.TempDir() + "/config.db" + store, err := configstore.NewConfigStore(context.Background(), &configstore.Config{ + Enabled: true, + Type: configstore.ConfigStoreTypeSQLite, + Config: &configstore.SQLiteConfig{ + Path: dbPath, + }, + }, &mockLogger{}) + require.NoError(t, err) + + t.Cleanup(func() { + _ = os.Remove(dbPath) + }) + return store +} + +func newTestRequestCtx(body string) *fasthttp.RequestCtx { + var req fasthttp.Request + req.SetBodyString(body) + + ctx := &fasthttp.RequestCtx{} + ctx.Init(&req, &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 12345}, nil) + return ctx +} + +func TestUpdatePricingOverride_ReplacesFullBody(t *testing.T) { + SetLogger(&mockLogger{}) + store := setupPricingOverrideHandlerStore(t) + handler := &GovernanceHandler{ + configStore: store, + governanceManager: pricingOverrideTestGovernanceManager{}, + } + + now := time.Now().UTC() + override := configstoreTables.TablePricingOverride{ + ID: "override-1", + Name: "Original", + ScopeKind: string(modelcatalog.ScopeKindGlobal), + MatchType: string(modelcatalog.MatchTypeExact), + Pattern: "gpt-4.1", + CreatedAt: now, + UpdatedAt: now, + PricingPatchJSON: `{"input_cost_per_token":1,"output_cost_per_token":2}`, + RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest}, + } + require.NoError(t, store.CreatePricingOverride(context.Background(), &override)) + + // Patch replaces in full: send only input_cost_per_token. + // output_cost_per_token must be absent from the stored patch afterwards, + // confirming full-replace (not merge) semantics. + body := `{ + "name":"Updated", + "scope_kind":"global", + "match_type":"exact", + "pattern":"gpt-4.1", + "request_types":["chat_completion"], + "patch":{"input_cost_per_token":1.5} + }` + ctx := newTestRequestCtx(body) + ctx.SetUserValue("id", override.ID) + + handler.updatePricingOverride(ctx) + + require.Equal(t, fasthttp.StatusOK, ctx.Response.StatusCode(), string(ctx.Response.Body())) + + stored, err := store.GetPricingOverrideByID(context.Background(), override.ID) + require.NoError(t, err) + assert.Equal(t, "Updated", stored.Name) + + var patch modelcatalog.PricingOptions + require.NoError(t, json.Unmarshal([]byte(stored.PricingPatchJSON), &patch)) + // Sent field must reflect the new value. + require.NotNil(t, patch.InputCostPerToken) + assert.Equal(t, 1.5, *patch.InputCostPerToken) + // Omitted field must be cleared — patch is always fully replaced, not merged. + assert.Nil(t, patch.OutputCostPerToken) + assert.Empty(t, stored.ConfigHash) +} diff --git a/transports/bifrost-http/handlers/providers.go b/transports/bifrost-http/handlers/providers.go index a00ee3789a..eb88d04908 100644 --- a/transports/bifrost-http/handlers/providers.go +++ b/transports/bifrost-http/handlers/providers.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "net/url" - "regexp" "slices" "sort" "strings" @@ -60,19 +59,18 @@ const ( // ProviderResponse represents the response for provider operations type ProviderResponse struct { - Name schemas.ModelProvider `json:"name"` - Keys []schemas.Key `json:"keys"` // API keys for the provider - NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings - ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings - ProxyConfig *schemas.ProxyConfig `json:"proxy_config"` // Proxy configuration - SendBackRawRequest bool `json:"send_back_raw_request"` // Include raw request in BifrostResponse - SendBackRawResponse bool `json:"send_back_raw_response"` // Include raw response in BifrostResponse - CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration - PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides - ProviderStatus ProviderStatus `json:"provider_status"` // Health/initialization status of the provider - Status string `json:"status,omitempty"` // Operational status (e.g., list_models_failed) - Description string `json:"description,omitempty"` // Error/status description - ConfigHash string `json:"config_hash,omitempty"` // Hash of config.json version, used for change detection + Name schemas.ModelProvider `json:"name"` + Keys []schemas.Key `json:"keys"` // API keys for the provider + NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings + ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings + ProxyConfig *schemas.ProxyConfig `json:"proxy_config"` // Proxy configuration + SendBackRawRequest bool `json:"send_back_raw_request"` // Include raw request in BifrostResponse + SendBackRawResponse bool `json:"send_back_raw_response"` // Include raw response in BifrostResponse + CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration + ProviderStatus ProviderStatus `json:"provider_status"` // Health/initialization status of the provider + Status string `json:"status,omitempty"` // Operational status (e.g., list_models_failed) + Description string `json:"description,omitempty"` // Error/status description + ConfigHash string `json:"config_hash,omitempty"` // Hash of config.json version, used for change detection } // ListProvidersResponse represents the response for listing all providers @@ -185,7 +183,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) { SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration - PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides }{} if err := json.Unmarshal(ctx.PostBody(), &payload); err != nil { SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid JSON: %v", err)) @@ -226,10 +223,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) { return } } - if err := validatePricingOverrides(payload.PricingOverrides); err != nil { - SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("invalid pricing overrides: %v", err)) - return - } // Validate retry backoff values if NetworkConfig is provided if payload.NetworkConfig != nil { if err := validateRetryBackoff(payload.NetworkConfig); err != nil { @@ -257,7 +250,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) { SendBackRawRequest: payload.SendBackRawRequest != nil && *payload.SendBackRawRequest, SendBackRawResponse: payload.SendBackRawResponse != nil && *payload.SendBackRawResponse, CustomProviderConfig: payload.CustomProviderConfig, - PricingOverrides: payload.PricingOverrides, } // Validate custom provider configuration before persisting if err := lib.ValidateCustomProvider(config, payload.Provider); err != nil { @@ -274,11 +266,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) { SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("Failed to add provider: %v", err)) return } - if h.inMemoryStore.ModelCatalog != nil { - if err := h.inMemoryStore.ModelCatalog.SetProviderPricingOverrides(payload.Provider, config.PricingOverrides); err != nil { - logger.Warn("Failed to set pricing overrides for provider %s: %v", payload.Provider, err) - } - } logger.Info("Provider %s added successfully", payload.Provider) // Attempt model discovery @@ -300,7 +287,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) { SendBackRawRequest: config.SendBackRawRequest, SendBackRawResponse: config.SendBackRawResponse, CustomProviderConfig: config.CustomProviderConfig, - PricingOverrides: config.PricingOverrides, Status: config.Status, Description: config.Description, }, ProviderStatusActive) @@ -327,24 +313,19 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) { } var payload = struct { - Keys []schemas.Key `json:"keys"` // API keys for the provider - NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings - ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings - ProxyConfig *schemas.ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration - SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse - SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse - CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration - PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides + Keys []schemas.Key `json:"keys"` // API keys for the provider + NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings + ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings + ProxyConfig *schemas.ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration + SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse + SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse + CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration }{} if err := sonic.Unmarshal(ctx.PostBody(), &payload); err != nil { SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid JSON: %v", err)) return } - if err := validatePricingOverrides(payload.PricingOverrides); err != nil { - SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("invalid pricing overrides: %v", err)) - return - } // Get the raw config to access actual values for merging with redacted request values oldConfigRaw, err := h.inMemoryStore.GetProviderConfigRaw(provider) @@ -380,7 +361,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) { ConcurrencyAndBufferSize: oldConfigRaw.ConcurrencyAndBufferSize, ProxyConfig: oldConfigRaw.ProxyConfig, CustomProviderConfig: oldConfigRaw.CustomProviderConfig, - PricingOverrides: oldConfigRaw.PricingOverrides, Status: oldConfigRaw.Status, Description: oldConfigRaw.Description, } @@ -466,7 +446,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) { config.ProxyConfig = payload.ProxyConfig config.CustomProviderConfig = payload.CustomProviderConfig - config.PricingOverrides = payload.PricingOverrides if payload.SendBackRawRequest != nil { config.SendBackRawRequest = *payload.SendBackRawRequest } @@ -500,12 +479,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) { SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("Failed to update provider: %v", err)) return } - if h.inMemoryStore.ModelCatalog != nil { - if err := h.inMemoryStore.ModelCatalog.SetProviderPricingOverrides(provider, config.PricingOverrides); err != nil { - logger.Warn("Failed to set pricing overrides for provider %s: %v", provider, err) - } - } - // Attempt model discovery err = h.attemptModelDiscovery(ctx, provider, payload.CustomProviderConfig) @@ -525,7 +498,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) { SendBackRawRequest: config.SendBackRawRequest, SendBackRawResponse: config.SendBackRawResponse, CustomProviderConfig: config.CustomProviderConfig, - PricingOverrides: config.PricingOverrides, Status: config.Status, Description: config.Description, }, ProviderStatusActive) @@ -1083,7 +1055,6 @@ func (h *ProviderHandler) getProviderResponseFromConfig(provider schemas.ModelPr SendBackRawRequest: config.SendBackRawRequest, SendBackRawResponse: config.SendBackRawResponse, CustomProviderConfig: config.CustomProviderConfig, - PricingOverrides: config.PricingOverrides, ProviderStatus: status, Status: config.Status, Description: config.Description, @@ -1091,101 +1062,6 @@ func (h *ProviderHandler) getProviderResponseFromConfig(provider schemas.ModelPr } } -func validatePricingOverrides(overrides []schemas.ProviderPricingOverride) error { - for i, override := range overrides { - if strings.TrimSpace(override.ModelPattern) == "" { - return fmt.Errorf("override[%d]: model_pattern is required", i) - } - - switch override.MatchType { - case schemas.PricingOverrideMatchExact: - if strings.Contains(override.ModelPattern, "*") { - return fmt.Errorf("override[%d]: exact match_type cannot include '*'", i) - } - case schemas.PricingOverrideMatchWildcard: - if !strings.Contains(override.ModelPattern, "*") { - return fmt.Errorf("override[%d]: wildcard match_type requires '*' in model_pattern", i) - } - case schemas.PricingOverrideMatchRegex: - if _, err := regexp.Compile(override.ModelPattern); err != nil { - return fmt.Errorf("override[%d]: invalid regex pattern: %w", i, err) - } - default: - return fmt.Errorf("override[%d]: unsupported match_type %q", i, override.MatchType) - } - - for _, requestType := range override.RequestTypes { - if !isSupportedOverrideRequestType(requestType) { - return fmt.Errorf("override[%d]: unsupported request_type %q", i, requestType) - } - } - - if err := validatePricingOverrideNonNegativeFields(i, override); err != nil { - return err - } - } - - return nil -} - -func isSupportedOverrideRequestType(requestType schemas.RequestType) bool { - switch requestType { - case schemas.TextCompletionRequest, - schemas.TextCompletionStreamRequest, - schemas.ChatCompletionRequest, - schemas.ChatCompletionStreamRequest, - schemas.ResponsesRequest, - schemas.ResponsesStreamRequest, - schemas.EmbeddingRequest, - schemas.RerankRequest, - schemas.SpeechRequest, - schemas.SpeechStreamRequest, - schemas.TranscriptionRequest, - schemas.TranscriptionStreamRequest, - schemas.ImageGenerationRequest, - schemas.ImageGenerationStreamRequest: - return true - default: - return false - } -} - -func validatePricingOverrideNonNegativeFields(index int, override schemas.ProviderPricingOverride) error { - optionalValues := map[string]*float64{ - "input_cost_per_token": override.InputCostPerToken, - "output_cost_per_token": override.OutputCostPerToken, - "input_cost_per_video_per_second": override.InputCostPerVideoPerSecond, - "input_cost_per_audio_per_second": override.InputCostPerAudioPerSecond, - "input_cost_per_character": override.InputCostPerCharacter, - "input_cost_per_token_above_128k_tokens": override.InputCostPerTokenAbove128kTokens, - "input_cost_per_image_above_128k_tokens": override.InputCostPerImageAbove128kTokens, - "input_cost_per_video_per_second_above_128k_tokens": override.InputCostPerVideoPerSecondAbove128kTokens, - "input_cost_per_audio_per_second_above_128k_tokens": override.InputCostPerAudioPerSecondAbove128kTokens, - "output_cost_per_token_above_128k_tokens": override.OutputCostPerTokenAbove128kTokens, - "input_cost_per_token_above_200k_tokens": override.InputCostPerTokenAbove200kTokens, - "output_cost_per_token_above_200k_tokens": override.OutputCostPerTokenAbove200kTokens, - "cache_creation_input_token_cost_above_200k_tokens": override.CacheCreationInputTokenCostAbove200kTokens, - "cache_read_input_token_cost_above_200k_tokens": override.CacheReadInputTokenCostAbove200kTokens, - "cache_read_input_token_cost": override.CacheReadInputTokenCost, - "cache_creation_input_token_cost": override.CacheCreationInputTokenCost, - "input_cost_per_token_batches": override.InputCostPerTokenBatches, - "output_cost_per_token_batches": override.OutputCostPerTokenBatches, - "input_cost_per_image_token": override.InputCostPerImageToken, - "output_cost_per_image_token": override.OutputCostPerImageToken, - "input_cost_per_image": override.InputCostPerImage, - "output_cost_per_image": override.OutputCostPerImage, - "cache_read_input_image_token_cost": override.CacheReadInputImageTokenCost, - } - - for fieldName, value := range optionalValues { - if value != nil && *value < 0 { - return fmt.Errorf("override[%d]: %s must be non-negative", index, fieldName) - } - } - - return nil -} - func getProviderFromCtx(ctx *fasthttp.RequestCtx) (schemas.ModelProvider, error) { providerValue := ctx.UserValue("provider") if providerValue == nil { diff --git a/transports/bifrost-http/lib/config.go b/transports/bifrost-http/lib/config.go index de53b1e8b0..9c53c17a67 100644 --- a/transports/bifrost-http/lib/config.go +++ b/transports/bifrost-http/lib/config.go @@ -440,13 +440,23 @@ func loadConfigFromFile(ctx context.Context, config *Config, data []byte) (*Conf // Load MCP config loadMCPConfigFromFile(ctx, config, &configData) // Load governance config - loadGovernanceConfigFromFile(ctx, config, &configData) + if err = loadGovernanceConfigFromFile(ctx, config, &configData); err != nil { + return nil, err + } // Load auth config loadAuthConfigFromFile(ctx, config, &configData) // Load plugins loadPluginsFromFile(ctx, config, &configData) // Initialize framework config and pricing manager initFrameworkConfigFromFile(ctx, config, &configData) + // ModelCatalog is now initialized; replay pricing overrides for the no-store path. + // loadGovernanceConfigFromFile ran before ModelCatalog existed, so the in-memory + // load was skipped. Do it here now that ModelCatalog is available. + if config.ConfigStore == nil && config.ModelCatalog != nil && config.GovernanceConfig != nil && len(config.GovernanceConfig.PricingOverrides) > 0 { + if err := config.ModelCatalog.SetPricingOverrides(config.GovernanceConfig.PricingOverrides); err != nil { + return nil, fmt.Errorf("failed to set pricing overrides from config file: %w", err) + } + } // Sync encryption: encrypt any plaintext rows written during config loading syncEncryption(ctx, config) // Load WebSocket config (always enabled, apply defaults for any missing values) @@ -912,7 +922,7 @@ func mergeMCPConfig(ctx context.Context, config *Config, configData *ConfigData, } // loadGovernanceConfigFromFile loads and merges governance config from file -func loadGovernanceConfigFromFile(ctx context.Context, config *Config, configData *ConfigData) { +func loadGovernanceConfigFromFile(ctx context.Context, config *Config, configData *ConfigData) error { var governanceConfig *configstore.GovernanceConfig var err error // Checking from the store @@ -936,10 +946,15 @@ func loadGovernanceConfigFromFile(ctx context.Context, config *Config, configDat // No governance config in store, use config file logger.Debug("no governance config found in store, processing from config file") config.GovernanceConfig = configData.Governance - createGovernanceConfigInStore(ctx, config) + if err := createGovernanceConfigInStore(ctx, config); err != nil { + return err + } + // Pricing overrides are loaded into ModelCatalog after initFrameworkConfigFromFile, + // once ModelCatalog is initialized. } else { logger.Debug("no governance config in store or config file") } + return nil } // mergeGovernanceConfig merges governance config from file with store @@ -1175,6 +1190,45 @@ func mergeGovernanceConfig(ctx context.Context, config *Config, configData *Conf routingRulesToAdd = append(routingRulesToAdd, configData.Governance.RoutingRules[i]) } } + // Merge PricingOverrides by ID with hash comparison + pricingOverridesToAdd := make([]configstoreTables.TablePricingOverride, 0) + pricingOverridesToUpdate := make([]configstoreTables.TablePricingOverride, 0) + for i, newOverride := range configData.Governance.PricingOverrides { + if len(newOverride.RequestTypes) > 0 { + b, err := json.Marshal(newOverride.RequestTypes) + if err != nil { + logger.Warn("failed to serialize request_types for pricing override %s: %v", newOverride.ID, err) + continue + } + configData.Governance.PricingOverrides[i].RequestTypesJSON = string(b) + } else { + configData.Governance.PricingOverrides[i].RequestTypesJSON = "[]" + } + fileHash, err := configstore.GeneratePricingOverrideHash(configData.Governance.PricingOverrides[i]) + if err != nil { + logger.Warn("failed to generate pricing override hash for %s: %v", newOverride.ID, err) + continue + } + configData.Governance.PricingOverrides[i].ConfigHash = fileHash + + found := false + for j, existing := range governanceConfig.PricingOverrides { + if existing.ID == newOverride.ID { + found = true + if existing.ConfigHash != fileHash { + logger.Debug("config hash mismatch for pricing override %s, syncing from config file", newOverride.ID) + pricingOverridesToUpdate = append(pricingOverridesToUpdate, configData.Governance.PricingOverrides[i]) + governanceConfig.PricingOverrides[j] = configData.Governance.PricingOverrides[i] + } else { + logger.Debug("config hash matches for pricing override %s, keeping DB config", newOverride.ID) + } + break + } + } + if !found { + pricingOverridesToAdd = append(pricingOverridesToAdd, configData.Governance.PricingOverrides[i]) + } + } // Add merged items to config config.GovernanceConfig.Budgets = append(governanceConfig.Budgets, budgetsToAdd...) config.GovernanceConfig.RateLimits = append(governanceConfig.RateLimits, rateLimitsToAdd...) @@ -1182,13 +1236,15 @@ func mergeGovernanceConfig(ctx context.Context, config *Config, configData *Conf config.GovernanceConfig.Teams = append(governanceConfig.Teams, teamsToAdd...) config.GovernanceConfig.VirtualKeys = append(governanceConfig.VirtualKeys, virtualKeysToAdd...) config.GovernanceConfig.RoutingRules = append(governanceConfig.RoutingRules, routingRulesToAdd...) + config.GovernanceConfig.PricingOverrides = append(governanceConfig.PricingOverrides, pricingOverridesToAdd...) // Update store with merged config items hasChanges := len(budgetsToAdd) > 0 || len(budgetsToUpdate) > 0 || len(rateLimitsToAdd) > 0 || len(rateLimitsToUpdate) > 0 || len(customersToAdd) > 0 || len(customersToUpdate) > 0 || len(teamsToAdd) > 0 || len(teamsToUpdate) > 0 || len(virtualKeysToAdd) > 0 || len(virtualKeysToUpdate) > 0 || - len(routingRulesToAdd) > 0 || len(routingRulesToUpdate) > 0 + len(routingRulesToAdd) > 0 || len(routingRulesToUpdate) > 0 || + len(pricingOverridesToAdd) > 0 || len(pricingOverridesToUpdate) > 0 if config.ConfigStore != nil && hasChanges { err := updateGovernanceConfigInStore(ctx, config, budgetsToAdd, budgetsToUpdate, @@ -1196,11 +1252,28 @@ func mergeGovernanceConfig(ctx context.Context, config *Config, configData *Conf customersToAdd, customersToUpdate, teamsToAdd, teamsToUpdate, virtualKeysToAdd, virtualKeysToUpdate, - routingRulesToAdd, routingRulesToUpdate) + routingRulesToAdd, routingRulesToUpdate, + pricingOverridesToAdd, pricingOverridesToUpdate) if err != nil { logger.Fatal("failed to sync governance config: %v", err) } } + // Sync pricing overrides into the model catalog in one batch to avoid + // rebuilding the lookup map on every iteration. + if config.ModelCatalog != nil { + rows := make([]*configstoreTables.TablePricingOverride, 0, len(pricingOverridesToAdd)+len(pricingOverridesToUpdate)) + for i := range pricingOverridesToAdd { + rows = append(rows, &pricingOverridesToAdd[i]) + } + for i := range pricingOverridesToUpdate { + rows = append(rows, &pricingOverridesToUpdate[i]) + } + if len(rows) > 0 { + if err := config.ModelCatalog.UpsertPricingOverrides(rows...); err != nil { + logger.Error("failed to upsert pricing overrides into model catalog: %v", err) + } + } + } } // updateGovernanceConfigInStore updates governance config items in the store @@ -1219,6 +1292,8 @@ func updateGovernanceConfigInStore( virtualKeysToUpdate []configstoreTables.TableVirtualKey, routingRulesToAdd []configstoreTables.TableRoutingRule, routingRulesToUpdate []configstoreTables.TableRoutingRule, + pricingOverridesToAdd []configstoreTables.TablePricingOverride, + pricingOverridesToUpdate []configstoreTables.TablePricingOverride, ) error { logger.Debug("updating governance config in store with merged items") return config.ConfigStore.ExecuteTransaction(ctx, func(tx *gorm.DB) error { @@ -1330,15 +1405,29 @@ func updateGovernanceConfigInStore( } } + // Create pricing overrides (new from config.json) + for _, override := range pricingOverridesToAdd { + if err := config.ConfigStore.CreatePricingOverride(ctx, &override, tx); err != nil { + return fmt.Errorf("failed to create pricing override %s: %w", override.ID, err) + } + } + + // Update pricing overrides (config.json changed) + for _, override := range pricingOverridesToUpdate { + if err := config.ConfigStore.UpdatePricingOverride(ctx, &override, tx); err != nil { + return fmt.Errorf("failed to update pricing override %s: %w", override.ID, err) + } + } + return nil }) } // createGovernanceConfigInStore creates governance config in store from config file -func createGovernanceConfigInStore(ctx context.Context, config *Config) { +func createGovernanceConfigInStore(ctx context.Context, config *Config) error { if config.ConfigStore == nil { logger.Debug("createGovernanceConfigInStore: ConfigStore is nil, skipping") - return + return nil } logger.Debug("createGovernanceConfigInStore: creating %d budgets, %d rate_limits, %d virtual_keys, %d routing_rules", len(config.GovernanceConfig.Budgets), @@ -1454,10 +1543,34 @@ func createGovernanceConfigInStore(ctx context.Context, config *Config) { virtualKey.MCPConfigs = mcpConfigs } + // Create pricing overrides after virtual keys so that scoped overrides referencing + // a virtual key ID are inserted after the VK row exists. + for i := range config.GovernanceConfig.PricingOverrides { + override := &config.GovernanceConfig.PricingOverrides[i] + if len(override.RequestTypes) > 0 { + b, err := json.Marshal(override.RequestTypes) + if err != nil { + return fmt.Errorf("failed to serialize request_types for pricing override %s: %w", override.ID, err) + } + override.RequestTypesJSON = string(b) + } else { + override.RequestTypesJSON = "[]" + } + overrideHash, err := configstore.GeneratePricingOverrideHash(*override) + if err != nil { + return fmt.Errorf("failed to generate pricing override hash for %s: %w", override.ID, err) + } + override.ConfigHash = overrideHash + if err := config.ConfigStore.CreatePricingOverride(ctx, override, tx); err != nil { + return fmt.Errorf("failed to create pricing override %s: %w", override.ID, err) + } + } + return nil }); err != nil { - logger.Warn("failed to update governance config: %v", err) + return fmt.Errorf("failed to create governance config in store: %w", err) } + return nil } // isBcryptHash checks if a string looks like a bcrypt hash @@ -1898,7 +2011,6 @@ func initFrameworkConfigFromFile(ctx context.Context, config *Config, configData logger.Error("failed to initialize pricing manager: %v", err) } else { config.ModelCatalog = pricingManager - applyProviderPricingOverrides(config.ModelCatalog, config.Providers) } // Initialize MCP catalog @@ -2127,7 +2239,6 @@ func loadDefaultProviders(ctx context.Context, config *Config) error { SendBackRawRequest: dbProvider.SendBackRawRequest, SendBackRawResponse: dbProvider.SendBackRawResponse, CustomProviderConfig: dbProvider.CustomProviderConfig, - PricingOverrides: dbProvider.PricingOverrides, ConfigHash: dbProvider.ConfigHash, } if err := ValidateCustomProvider(providerConfig, provider); err != nil { @@ -2276,7 +2387,6 @@ func initDefaultFrameworkConfig(ctx context.Context, config *Config) error { logger.Error("failed to initialize model catalog: %v", err) } else { config.ModelCatalog = modelCatalog - applyProviderPricingOverrides(config.ModelCatalog, config.Providers) } // Initialize MCP catalog @@ -3741,14 +3851,3 @@ func DeepCopy[T any](in T) (T, error) { err = sonic.Unmarshal(b, &out) return out, err } - -func applyProviderPricingOverrides(catalog *modelcatalog.ModelCatalog, providers map[schemas.ModelProvider]configstore.ProviderConfig) { - if catalog == nil { - return - } - for provider, providerConfig := range providers { - if err := catalog.SetProviderPricingOverrides(provider, providerConfig.PricingOverrides); err != nil { - logger.Warn("failed to load pricing overrides for provider %s: %v", provider, err) - } - } -} diff --git a/transports/bifrost-http/lib/config_test.go b/transports/bifrost-http/lib/config_test.go index 74cbd3d2e7..58cef42646 100644 --- a/transports/bifrost-http/lib/config_test.go +++ b/transports/bifrost-http/lib/config_test.go @@ -855,6 +855,30 @@ func (m *MockConfigStore) DeleteModelPrices(ctx context.Context, tx ...*gorm.DB) return nil } +func (m *MockConfigStore) GetPricingOverrides(ctx context.Context, filter configstore.PricingOverrideFilters) ([]tables.TablePricingOverride, error) { + return []tables.TablePricingOverride{}, nil +} + +func (m *MockConfigStore) GetPricingOverridesPaginated(ctx context.Context, params configstore.PricingOverridesQueryParams) ([]tables.TablePricingOverride, int64, error) { + return []tables.TablePricingOverride{}, 0, nil +} + +func (m *MockConfigStore) GetPricingOverrideByID(ctx context.Context, id string) (*tables.TablePricingOverride, error) { + return nil, configstore.ErrNotFound +} + +func (m *MockConfigStore) CreatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error { + return nil +} + +func (m *MockConfigStore) UpdatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error { + return nil +} + +func (m *MockConfigStore) DeletePricingOverride(ctx context.Context, id string, tx ...*gorm.DB) error { + return nil +} + // Model parameters func (m *MockConfigStore) GetModelParameters(ctx context.Context, model string) (*tables.TableModelParameters, error) { return nil, nil @@ -12249,13 +12273,13 @@ func TestMergePluginsFromFile_NoChangeSkipsMerge(t *testing.T) { mock := &MockConfigStore{ plugins: []*tables.TablePlugin{ { - Name: "plugin-a", - Enabled: true, - Placement: &postBuiltin, - Order: &order0, - Version: 1, + Name: "plugin-a", + Enabled: true, + Placement: &postBuiltin, + Order: &order0, + Version: 1, ConfigJSON: `{"setting":"db-value"}`, - Config: map[string]any{"setting": "db-value"}, + Config: map[string]any{"setting": "db-value"}, }, }, } diff --git a/transports/bifrost-http/server/server.go b/transports/bifrost-http/server/server.go index a84b966ef1..c0e973d6a3 100644 --- a/transports/bifrost-http/server/server.go +++ b/transports/bifrost-http/server/server.go @@ -62,6 +62,8 @@ type ServerCallbacks interface { // Pricing related callbacks ReloadPricingManager(ctx context.Context) error ForceReloadPricing(ctx context.Context) error + UpsertPricingOverride(ctx context.Context, override *tables.TablePricingOverride) error + DeletePricingOverride(ctx context.Context, id string) error // Proxy related callbacks ReloadProxyConfig(ctx context.Context, config *tables.GlobalProxyConfig) error // Client config related callbacks @@ -499,11 +501,6 @@ func (s *BifrostHTTPServer) ReloadProvider(ctx context.Context, provider schemas } } - // Syncing models (this part always runs regardless of governance) - if err := s.Config.ModelCatalog.SetProviderPricingOverrides(provider, providerInfo.PricingOverrides); err != nil { - logger.Warn("failed to refresh pricing overrides for provider %s: %v", provider, err) - } - bfCtx := schemas.NewBifrostContext(ctx, time.Now().Add(15*time.Second)) bfCtx.SetValue(schemas.BifrostContextKeySkipPluginPipeline, true) bfCtx.SetValue(schemas.BifrostContextKeyValidateKeys, true) // Validate keys during provider add/update @@ -595,7 +592,6 @@ func (s *BifrostHTTPServer) RemoveProvider(ctx context.Context, provider schemas return fmt.Errorf("pricing manager not found") } s.Config.ModelCatalog.DeleteModelDataForProvider(provider) - s.Config.ModelCatalog.DeleteProviderPricingOverrides(provider) return nil } @@ -764,11 +760,6 @@ func (s *BifrostHTTPServer) ForceReloadPricing(ctx context.Context) error { return fmt.Errorf("failed to initialize new model catalog: %w", err) } s.Config.ModelCatalog = modelCatalog - for provider, providerConfig := range s.Config.Providers { - if err := s.Config.ModelCatalog.SetProviderPricingOverrides(provider, providerConfig.PricingOverrides); err != nil { - logger.Warn("failed to seed pricing overrides for provider %s: %v", provider, err) - } - } } else { if err := s.Config.ModelCatalog.ForceReloadPricing(ctx); err != nil { return fmt.Errorf("failed to force reload pricing: %w", err) @@ -817,6 +808,23 @@ func (s *BifrostHTTPServer) ForceReloadPricing(ctx context.Context) error { return nil } +// UpsertPricingOverride inserts or updates a pricing override in the in-memory model catalog. +func (s *BifrostHTTPServer) UpsertPricingOverride(ctx context.Context, override *tables.TablePricingOverride) error { + if s.Config == nil || s.Config.ModelCatalog == nil { + return fmt.Errorf("pricing manager not found") + } + return s.Config.ModelCatalog.UpsertPricingOverrides(override) +} + +// DeletePricingOverride removes a pricing override from the in-memory model catalog. +func (s *BifrostHTTPServer) DeletePricingOverride(ctx context.Context, id string) error { + if s.Config == nil || s.Config.ModelCatalog == nil { + return fmt.Errorf("pricing manager not found") + } + s.Config.ModelCatalog.DeletePricingOverride(id) + return nil +} + // ReloadProxyConfig reloads the proxy configuration func (s *BifrostHTTPServer) ReloadProxyConfig(ctx context.Context, config *tables.GlobalProxyConfig) error { if s.Config == nil { diff --git a/transports/config.schema.json b/transports/config.schema.json index cfb89ada7b..0b841b48e2 100644 --- a/transports/config.schema.json +++ b/transports/config.schema.json @@ -497,6 +497,13 @@ "$ref": "#/$defs/routing_rule" } }, + "pricing_overrides": { + "type": "array", + "description": "Scoped pricing overrides applied at runtime by the model catalog", + "items": { + "$ref": "#/$defs/pricing_override" + } + }, "auth_config": { "$ref": "#/$defs/auth_config" }, @@ -1613,159 +1620,6 @@ }, "additionalProperties": false }, - "pricing_override_match_type": { - "type": "string", - "enum": [ - "exact", - "wildcard", - "regex" - ] - }, - "pricing_override_request_type": { - "type": "string", - "enum": [ - "text_completion", - "text_completion_stream", - "chat_completion", - "chat_completion_stream", - "responses", - "responses_stream", - "embedding", - "rerank", - "speech", - "speech_stream", - "transcription", - "transcription_stream", - "image_generation", - "image_generation_stream" - ] - }, - "provider_pricing_override": { - "type": "object", - "properties": { - "model_pattern": { - "type": "string", - "minLength": 1 - }, - "match_type": { - "$ref": "#/$defs/pricing_override_match_type" - }, - "request_types": { - "type": "array", - "items": { - "$ref": "#/$defs/pricing_override_request_type" - } - }, - "input_cost_per_token": { "type": "number", "minimum": 0 }, - "output_cost_per_token": { "type": "number", "minimum": 0 }, - "input_cost_per_video_per_second": { "type": "number", "minimum": 0 }, - "input_cost_per_audio_per_second": { "type": "number", "minimum": 0 }, - "input_cost_per_character": { "type": "number", "minimum": 0 }, - "output_cost_per_character": { "type": "number", "minimum": 0 }, - "input_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_image_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_video_per_second_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_audio_per_second_above_128k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_creation_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_read_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_read_input_token_cost": { "type": "number", "minimum": 0 }, - "cache_creation_input_token_cost": { "type": "number", "minimum": 0 }, - "input_cost_per_token_batches": { "type": "number", "minimum": 0 }, - "output_cost_per_token_batches": { "type": "number", "minimum": 0 }, - "input_cost_per_image_token": { "type": "number", "minimum": 0 }, - "output_cost_per_image_token": { "type": "number", "minimum": 0 }, - "input_cost_per_image": { "type": "number", "minimum": 0 }, - "output_cost_per_image": { "type": "number", "minimum": 0 }, - "cache_read_input_image_token_cost": { "type": "number", "minimum": 0 } - }, - "required": [ - "model_pattern", - "match_type" - ], - "additionalProperties": false - }, - "custom_provider_config": { - "type": "object", - "description": "Custom provider configuration for extending or customizing provider behavior", - "properties": { - "is_key_less": { - "type": "boolean", - "description": "Whether the custom provider requires a key" - }, - "base_provider_type": { - "type": "string", - "description": "Base provider type to extend" - }, - "allowed_requests": { - "type": "object", - "description": "Allowed request types for the custom provider", - "properties": { - "list_models": { "type": "boolean" }, - "text_completion": { "type": "boolean" }, - "text_completion_stream": { "type": "boolean" }, - "chat_completion": { "type": "boolean" }, - "chat_completion_stream": { "type": "boolean" }, - "responses": { "type": "boolean" }, - "responses_stream": { "type": "boolean" }, - "count_tokens": { "type": "boolean" }, - "embedding": { "type": "boolean" }, - "rerank": { "type": "boolean" }, - "speech": { "type": "boolean" }, - "speech_stream": { "type": "boolean" }, - "transcription": { "type": "boolean" }, - "transcription_stream": { "type": "boolean" }, - "image_generation": { "type": "boolean" }, - "image_generation_stream": { "type": "boolean" }, - "image_edit": { "type": "boolean" }, - "image_edit_stream": { "type": "boolean" }, - "image_variation": { "type": "boolean" }, - "video_generation": { "type": "boolean" }, - "video_retrieve": { "type": "boolean" }, - "video_download": { "type": "boolean" }, - "video_delete": { "type": "boolean" }, - "video_list": { "type": "boolean" }, - "video_remix": { "type": "boolean" }, - "batch_create": { "type": "boolean" }, - "batch_list": { "type": "boolean" }, - "batch_retrieve": { "type": "boolean" }, - "batch_cancel": { "type": "boolean" }, - "batch_delete": { "type": "boolean" }, - "batch_results": { "type": "boolean" }, - "file_upload": { "type": "boolean" }, - "file_list": { "type": "boolean" }, - "file_retrieve": { "type": "boolean" }, - "file_delete": { "type": "boolean" }, - "file_content": { "type": "boolean" }, - "container_create": { "type": "boolean" }, - "container_list": { "type": "boolean" }, - "container_retrieve": { "type": "boolean" }, - "container_delete": { "type": "boolean" }, - "container_file_create": { "type": "boolean" }, - "container_file_list": { "type": "boolean" }, - "container_file_retrieve": { "type": "boolean" }, - "container_file_content": { "type": "boolean" }, - "container_file_delete": { "type": "boolean" }, - "passthrough": { "type": "boolean" }, - "passthrough_stream": { "type": "boolean" } - }, - "additionalProperties": false - }, - "request_path_overrides": { - "type": "object", - "description": "Mapping of request type to custom path overriding the default provider path", - "additionalProperties": { - "type": "string" - } - } - }, - "required": ["base_provider_type"], - "additionalProperties": false - }, "network_config": { "type": "object", "properties": { @@ -2074,13 +1928,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2122,13 +1969,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2170,13 +2010,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2218,13 +2051,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2266,13 +2092,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -3209,6 +3028,155 @@ } }, "additionalProperties": false + }, + "pricing_override": { + "type": "object", + "description": "Scoped pricing override applied at runtime by the model catalog", + "properties": { + "id": { + "type": "string", + "description": "Unique pricing override ID" + }, + "name": { + "type": "string", + "description": "Human-readable name for this override" + }, + "scope_kind": { + "type": "string", + "description": "Scope level for this override", + "enum": ["global", "provider", "provider_key", "virtual_key", "virtual_key_provider", "virtual_key_provider_key"] + }, + "virtual_key_id": { + "type": "string", + "description": "Virtual key ID (required for virtual_key* scopes)" + }, + "provider_id": { + "type": "string", + "description": "Provider ID (required for provider* scopes)" + }, + "provider_key_id": { + "type": "string", + "description": "Provider key ID (required for provider_key and virtual_key_provider_key scopes)" + }, + "match_type": { + "type": "string", + "description": "How the pattern is matched against model names", + "enum": ["exact", "wildcard"] + }, + "pattern": { + "type": "string", + "description": "Model name pattern to match (exact name or wildcard prefix ending with *)" + }, + "request_types": { + "type": "array", + "description": "Request types this override applies to. At least one value is required.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (e.g. '{\"input_cost_per_token\":0.000001}')" + }, + "config_hash": { + "type": "string", + "description": "Internal hash for change detection (auto-managed)" + } + }, + "required": ["id", "name", "scope_kind", "match_type", "pattern", "request_types"], + "additionalProperties": false + }, + "pricing_override_match_type": { + "type": "string", + "enum": ["exact", "wildcard"] + }, + "pricing_override_request_type": { + "type": "string", + "enum": [ + "chat_completion", "text_completion", "responses", + "embedding", "rerank", + "speech", "transcription", + "image_generation", "image_variation", "image_edit", + "video_generation", "video_remix" + ] + }, + "custom_provider_config": { + "type": "object", + "description": "Custom provider configuration for extending or customizing provider behavior", + "properties": { + "is_key_less": { + "type": "boolean", + "description": "Whether the custom provider requires a key" + }, + "base_provider_type": { + "type": "string", + "description": "Base provider type to extend" + }, + "allowed_requests": { + "type": "object", + "description": "Allowed request types for the custom provider", + "properties": { + "list_models": { "type": "boolean" }, + "text_completion": { "type": "boolean" }, + "text_completion_stream": { "type": "boolean" }, + "chat_completion": { "type": "boolean" }, + "chat_completion_stream": { "type": "boolean" }, + "responses": { "type": "boolean" }, + "responses_stream": { "type": "boolean" }, + "count_tokens": { "type": "boolean" }, + "embedding": { "type": "boolean" }, + "rerank": { "type": "boolean" }, + "speech": { "type": "boolean" }, + "speech_stream": { "type": "boolean" }, + "transcription": { "type": "boolean" }, + "transcription_stream": { "type": "boolean" }, + "image_generation": { "type": "boolean" }, + "image_generation_stream": { "type": "boolean" }, + "image_edit": { "type": "boolean" }, + "image_edit_stream": { "type": "boolean" }, + "image_variation": { "type": "boolean" }, + "video_generation": { "type": "boolean" }, + "video_retrieve": { "type": "boolean" }, + "video_download": { "type": "boolean" }, + "video_delete": { "type": "boolean" }, + "video_list": { "type": "boolean" }, + "video_remix": { "type": "boolean" }, + "batch_create": { "type": "boolean" }, + "batch_list": { "type": "boolean" }, + "batch_retrieve": { "type": "boolean" }, + "batch_cancel": { "type": "boolean" }, + "batch_delete": { "type": "boolean" }, + "batch_results": { "type": "boolean" }, + "file_upload": { "type": "boolean" }, + "file_list": { "type": "boolean" }, + "file_retrieve": { "type": "boolean" }, + "file_delete": { "type": "boolean" }, + "file_content": { "type": "boolean" }, + "container_create": { "type": "boolean" }, + "container_list": { "type": "boolean" }, + "container_retrieve": { "type": "boolean" }, + "container_delete": { "type": "boolean" }, + "container_file_create": { "type": "boolean" }, + "container_file_list": { "type": "boolean" }, + "container_file_retrieve": { "type": "boolean" }, + "container_file_content": { "type": "boolean" }, + "container_file_delete": { "type": "boolean" }, + "passthrough": { "type": "boolean" }, + "passthrough_stream": { "type": "boolean" }, + "websocket_responses": { "type": "boolean" }, + "realtime": { "type": "boolean" } + }, + "additionalProperties": false + }, + "request_path_overrides": { + "type": "object", + "description": "Mapping of request type to custom path overriding the default provider path", + "additionalProperties": { "type": "string" } + } + }, + "required": ["base_provider_type"], + "additionalProperties": false } } } diff --git a/ui/app/workspace/custom-pricing/overrides/page.tsx b/ui/app/workspace/custom-pricing/overrides/page.tsx new file mode 100644 index 0000000000..69de04cfb7 --- /dev/null +++ b/ui/app/workspace/custom-pricing/overrides/page.tsx @@ -0,0 +1,11 @@ +"use client"; + +import ScopedPricingOverridesView from "@/app/workspace/custom-pricing/overrides/scopedPricingOverridesView"; + +export default function ScopedPricingOverridesPage() { + return ( +
+ +
+ ); +} diff --git a/ui/app/workspace/custom-pricing/overrides/pricingFieldSelector.tsx b/ui/app/workspace/custom-pricing/overrides/pricingFieldSelector.tsx new file mode 100644 index 0000000000..8552e08565 --- /dev/null +++ b/ui/app/workspace/custom-pricing/overrides/pricingFieldSelector.tsx @@ -0,0 +1,234 @@ +"use client"; + +import { Badge } from "@/components/ui/badge"; +import { Input } from "@/components/ui/input"; +import { cn } from "@/lib/utils"; +import { ChevronDown, Plus, X } from "lucide-react"; +import { useEffect, useMemo, useState } from "react"; +import type { FieldErrors, PricingFieldKey } from "./pricingOverrideSheet"; +import { PRICING_FIELDS } from "./pricingOverrideSheet"; + +type GroupKey = "chat" | "embedding" | "rerank" | "audio" | "image" | "video"; + +const PRICING_GROUPS: { key: GroupKey; label: string }[] = [ + { key: "chat", label: "Chat / Text / Responses" }, + { key: "embedding", label: "Embedding" }, + { key: "rerank", label: "Rerank" }, + { key: "audio", label: "Audio" }, + { key: "image", label: "Image" }, + { key: "video", label: "Video" }, +]; + +const REQUEST_TYPE_TO_CATEGORY: Record = { + chat_completion: "chat", + text_completion: "chat", + responses: "chat", + embedding: "embedding", + rerank: "rerank", + speech: "audio", + transcription: "audio", + image_generation: "image", + image_variation: "image", + image_edit: "image", + video_generation: "video", + video_remix: "video", +}; + +interface PricingFieldSelectorProps { + values: Partial>; + errors: FieldErrors; + selectedRequestTypes?: string[]; + onChange: (key: PricingFieldKey, value: string) => void; + onFieldInteraction?: () => void; +} + +export function PricingFieldSelector({ values, errors, selectedRequestTypes, onChange, onFieldInteraction }: PricingFieldSelectorProps) { + const [search, setSearch] = useState(""); + const [openGroups, setOpenGroups] = useState>(new Set(["chat"])); + + const [activeFields, setActiveFields] = useState>( + () => new Set(PRICING_FIELDS.filter((f) => values[f.key] != null && values[f.key]!.trim() !== "").map((f) => f.key)), + ); + + // Sync active fields to exactly the set of keys that have non-empty values. + // This handles both loading new overrides (adds keys) and clearing the patch (removes stale keys). + useEffect(() => { + setActiveFields(new Set(PRICING_FIELDS.filter((f) => values[f.key] != null && values[f.key]!.trim() !== "").map((f) => f.key))); + }, [values]); + + // Derive active categories from selected request types + const activeCategories = useMemo | null>(() => { + if (!selectedRequestTypes || selectedRequestTypes.length === 0) return null; + const cats = new Set(); + for (const rt of selectedRequestTypes) { + const cat = REQUEST_TYPE_TO_CATEGORY[rt]; + if (cat) cats.add(cat); + } + return cats.size > 0 ? cats : null; + }, [selectedRequestTypes]); + + const trimmedSearch = search.trim().toLowerCase(); + const isSearching = trimmedSearch.length > 0; + + const filteredFields = useMemo(() => { + if (!isSearching) return null; + return PRICING_FIELDS.filter((f) => f.label.toLowerCase().includes(trimmedSearch) || f.key.toLowerCase().includes(trimmedSearch)); + }, [isSearching, trimmedSearch]); + + // Fields visible per group when not searching, respecting activeCategories filter + const visibleGroupedFields = useMemo( + () => + PRICING_GROUPS.map((group) => { + const fields = PRICING_FIELDS.filter((f) => { + if (f.group !== group.key) return false; + if (activeCategories === null) return true; + return (f.requestTypeGroups as readonly string[]).some((rg) => activeCategories.has(rg as GroupKey)); + }); + return { ...group, fields }; + }).filter((g) => g.fields.length > 0), + [activeCategories], + ); + + const toggleGroup = (key: GroupKey) => { + setOpenGroups((prev) => { + const next = new Set(prev); + if (next.has(key)) next.delete(key); + else next.add(key); + return next; + }); + }; + + const activateField = (key: PricingFieldKey) => { + setActiveFields((prev) => new Set([...prev, key])); + }; + + const deactivateField = (key: PricingFieldKey) => { + setActiveFields((prev) => { + const next = new Set(prev); + next.delete(key); + return next; + }); + onFieldInteraction?.(); + onChange(key, ""); + }; + + const handleInputChange = (key: PricingFieldKey, value: string) => { + onFieldInteraction?.(); + onChange(key, value); + }; + + const renderFieldRow = (field: { key: PricingFieldKey; label: string }) => { + const isActive = activeFields.has(field.key); + const hasValue = values[field.key]?.trim(); + const error = errors[field.key]; + + if (!isActive) { + return ( + + ); + } + + return ( +
+
+ {field.label} + +
+ handleInputChange(field.key, e.target.value)} + placeholder="0.0" + /> + {error &&

{error}

} +
+ ); + }; + + return ( +
+ setSearch(e.target.value)} + className="h-9" + data-testid="pricing-field-search" + /> + +
+ {isSearching ? ( +
+ {filteredFields!.length === 0 ? ( +
No fields match “{search}”
+ ) : ( + filteredFields!.map((field) => renderFieldRow(field)) + )} +
+ ) : ( +
+ {visibleGroupedFields.length === 0 ? ( +
No pricing fields for the selected request types
+ ) : ( + visibleGroupedFields.map((group) => { + const isOpen = openGroups.has(group.key); + const valueCount = group.fields.filter((f) => values[f.key]?.trim()).length; + + return ( +
+ + + {isOpen && ( +
+ {group.fields.map((field) => renderFieldRow(field))} +
+ )} +
+ ); + }) + )} +
+ )} +
+
+ ); +} diff --git a/ui/app/workspace/custom-pricing/overrides/pricingOverrideSheet.tsx b/ui/app/workspace/custom-pricing/overrides/pricingOverrideSheet.tsx new file mode 100644 index 0000000000..e31d3eb1ef --- /dev/null +++ b/ui/app/workspace/custom-pricing/overrides/pricingOverrideSheet.tsx @@ -0,0 +1,884 @@ +"use client"; + +import { CodeEditor } from "@/components/ui/codeEditor"; +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; +import { DottedSeparator } from "@/components/ui/separator"; +import { Sheet, SheetContent, SheetHeader, SheetTitle } from "@/components/ui/sheet"; +import { PricingFieldSelector } from "./pricingFieldSelector"; +import { + getErrorMessage, + useCreatePricingOverrideMutation, + useGetProvidersQuery, + useGetVirtualKeysQuery, + useUpdatePricingOverrideMutation, +} from "@/lib/store"; +import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons"; +import { getProviderLabel, RequestTypeLabels } from "@/lib/constants/logs"; +import { ModelProvider, RequestType } from "@/lib/types/config"; +import { + CreatePricingOverrideRequest, + PricingOverride, + PricingOverrideMatchType, + PricingOverridePatch, + PricingOverrideScopeKind, +} from "@/lib/types/governance"; +import { cn } from "@/lib/utils"; +import { ChevronDown, Save, X } from "lucide-react"; +import { Dispatch, SetStateAction, useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { toast } from "sonner"; + +export const REQUEST_TYPE_GROUPS = [ + { + label: "Chat / Text / Responses", + types: ["chat_completion", "text_completion", "responses"], + }, + { + label: "Embedding", + types: ["embedding"], + }, + { + label: "Rerank", + types: ["rerank"], + }, + { + label: "Audio", + types: ["speech", "transcription"], + }, + { + label: "Image", + types: ["image_generation", "image_variation", "image_edit"], + }, + { + label: "Video", + types: ["video_generation", "video_remix"], + }, +] as const; + +export const REQUEST_TYPE_OPTIONS = REQUEST_TYPE_GROUPS.flatMap((g) => g.types); + +export function getRequestTypeGroup(rt: string): string | undefined { + return REQUEST_TYPE_GROUPS.find((g) => (g.types as readonly string[]).includes(rt))?.label; +} + +export const PRICING_FIELDS = [ + // Chat / Text / Responses fields + { key: "input_cost_per_token", label: "Input / token", group: "chat", requestTypeGroups: ["chat", "embedding", "rerank", "audio", "image", "video"] }, + { key: "output_cost_per_token", label: "Output / token", group: "chat", requestTypeGroups: ["chat", "rerank", "audio", "image", "video"] }, + { key: "input_cost_per_token_batches", label: "Input / token (batch)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "output_cost_per_token_batches", label: "Output / token (batch)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "input_cost_per_token_priority", label: "Input / token (priority)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "output_cost_per_token_priority", label: "Output / token (priority)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "input_cost_per_token_above_128k_tokens", label: "Input / token (>128k)", group: "chat", requestTypeGroups: ["chat", "embedding", "rerank"] }, + { key: "output_cost_per_token_above_128k_tokens", label: "Output / token (>128k)", group: "chat", requestTypeGroups: ["chat", "rerank", "audio"] }, + { key: "input_cost_per_token_above_200k_tokens", label: "Input / token (>200k)", group: "chat", requestTypeGroups: ["chat", "embedding", "rerank"] }, + { key: "output_cost_per_token_above_200k_tokens", label: "Output / token (>200k)", group: "chat", requestTypeGroups: ["chat", "rerank", "audio"] }, + { key: "cache_creation_input_token_cost", label: "Cache creation / token", group: "chat", requestTypeGroups: ["chat"] }, + { key: "cache_read_input_token_cost", label: "Cache read / token", group: "chat", requestTypeGroups: ["chat"] }, + { key: "cache_creation_input_token_cost_above_200k_tokens", label: "Cache creation / token (>200k)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "cache_read_input_token_cost_above_200k_tokens", label: "Cache read / token (>200k)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "cache_creation_input_token_cost_above_1hr", label: "Cache creation / token (>1hr)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "cache_creation_input_token_cost_above_1hr_above_200k_tokens", label: "Cache creation / token (>1hr, >200k)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "cache_read_input_token_cost_priority", label: "Cache read / token (priority)", group: "chat", requestTypeGroups: ["chat"] }, + { key: "search_context_cost_per_query", label: "Search context / query", group: "chat", requestTypeGroups: ["chat", "rerank"] }, + { key: "code_interpreter_cost_per_session", label: "Code interpreter / session", group: "chat", requestTypeGroups: ["chat"] }, + // Audio fields + { key: "input_cost_per_character", label: "Input / character", group: "audio", requestTypeGroups: ["audio"] }, + { key: "input_cost_per_audio_token", label: "Input / audio token", group: "audio", requestTypeGroups: ["audio"] }, + { key: "input_cost_per_audio_per_second", label: "Input / audio second", group: "audio", requestTypeGroups: ["audio"] }, + { key: "input_cost_per_audio_per_second_above_128k_tokens", label: "Input / audio second (>128k)", group: "audio", requestTypeGroups: ["audio"] }, + { key: "input_cost_per_second", label: "Input / second", group: "audio", requestTypeGroups: ["audio", "video"] }, + { key: "output_cost_per_audio_token", label: "Output / audio token", group: "audio", requestTypeGroups: ["audio"] }, + { key: "output_cost_per_second", label: "Output / second", group: "audio", requestTypeGroups: ["audio", "video"] }, + { key: "cache_creation_input_audio_token_cost", label: "Cache creation / audio token", group: "audio", requestTypeGroups: ["audio"] }, + // Image fields + { key: "input_cost_per_image_token", label: "Input / image token", group: "image", requestTypeGroups: ["image"] }, + { key: "input_cost_per_image", label: "Input / image", group: "image", requestTypeGroups: ["image"] }, + { key: "input_cost_per_image_above_128k_tokens", label: "Input / image (>128k)", group: "image", requestTypeGroups: ["image"] }, + { key: "input_cost_per_pixel", label: "Input / pixel", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_token", label: "Output / image token", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image", label: "Output / image", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_pixel", label: "Output / pixel", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_premium_image", label: "Output / image (premium)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_above_512_and_512_pixels", label: "Output / image (>512px)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_above_512_and_512_pixels_and_premium_image", label: "Output / image (>512px, premium)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_above_1024_and_1024_pixels", label: "Output / image (>1024px)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image", label: "Output / image (>1024px, premium)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_low_quality", label: "Output / image (low quality)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_medium_quality", label: "Output / image (medium quality)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_high_quality", label: "Output / image (high quality)", group: "image", requestTypeGroups: ["image"] }, + { key: "output_cost_per_image_auto_quality", label: "Output / image (auto quality)", group: "image", requestTypeGroups: ["image"] }, + { key: "cache_read_input_image_token_cost", label: "Cache read / image token", group: "image", requestTypeGroups: ["image"] }, + // Video fields + { key: "input_cost_per_video_per_second", label: "Input / video second", group: "video", requestTypeGroups: ["video"] }, + { key: "input_cost_per_video_per_second_above_128k_tokens", label: "Input / video second (>128k)", group: "video", requestTypeGroups: ["video"] }, + { key: "output_cost_per_video_per_second", label: "Output / video second", group: "video", requestTypeGroups: ["video"] }, +] as const; + +export type PricingFieldKey = (typeof PRICING_FIELDS)[number]["key"]; +export type FieldErrors = Partial>; + +type ScopeRoot = "global" | "virtual_key"; + +export interface FormState { + name: string; + scopeRoot: ScopeRoot; + virtualKeyID: string; + providerID: string; + providerKeyID: string; + matchType: PricingOverrideMatchType; + pattern: string; + requestTypes: RequestType[]; + pricingValues: Partial>; +} + +export const defaultFormState: FormState = { + name: "", + scopeRoot: "global", + virtualKeyID: "", + providerID: "", + providerKeyID: "", + matchType: "exact", + pattern: "", + requestTypes: [], + pricingValues: {}, +}; + +export const fieldLabelByKey = Object.fromEntries(PRICING_FIELDS.map((field) => [field.key, field.label])) as Record< + PricingFieldKey, + string +>; +export const patchKeys = PRICING_FIELDS.map((field) => field.key) as PricingFieldKey[]; + +export function patternError(matchType: PricingOverrideMatchType, pattern: string): string | undefined { + const trimmed = pattern.trim(); + if (!trimmed) return "Pattern is required"; + if (matchType === "exact") { + if (trimmed.includes("*")) return "Exact pattern cannot contain *"; + } else if (matchType === "wildcard") { + const starCount = (trimmed.match(/\*/g) || []).length; + if (starCount === 0) return "Wildcard pattern must end with * (example: gpt-5*)"; + if (starCount > 1) return "Wildcard pattern can include only one *"; + if (!trimmed.endsWith("*")) return "Wildcard supports prefix-only trailing *"; + } + return undefined; +} + +export function buildPatchFromForm(form: FormState): { patch: PricingOverridePatch; errors: FieldErrors } { + const errors: FieldErrors = {}; + const patch: PricingOverridePatch = {}; + + for (const key of patchKeys) { + const raw = form.pricingValues[key]; + if (raw == null || raw.trim() === "") continue; + const parsed = Number(raw); + if (!Number.isFinite(parsed)) { + errors[key] = "Must be a number"; + continue; + } + if (parsed < 0) { + errors[key] = "Must be >= 0"; + continue; + } + (patch as Record)[key] = parsed; + } + + return { patch, errors }; +} + +function toFormState(override: PricingOverride): FormState { + const values: Partial> = {}; + let parsedPatch: Record = {}; + try { + if (override.pricing_patch) parsedPatch = JSON.parse(override.pricing_patch); + } catch { + // malformed patch — leave values empty + } + for (const key of patchKeys) { + const val = parsedPatch[key]; + if (typeof val === "number") values[key] = String(val); + } + const scopeKind = resolveScopeKind(override); + + const scopeRoot: ScopeRoot = + scopeKind === "virtual_key" || scopeKind === "virtual_key_provider" || scopeKind === "virtual_key_provider_key" + ? "virtual_key" + : "global"; + + return { + name: override.name ?? "", + scopeRoot, + virtualKeyID: override.virtual_key_id ?? "", + providerID: override.provider_id ?? "", + providerKeyID: override.provider_key_id ?? "", + matchType: override.match_type, + pattern: override.pattern, + requestTypes: override.request_types ?? [], + pricingValues: values, + }; +} + +function resolveScopeKind(override: PricingOverride): PricingOverrideScopeKind { + if ( + override.scope_kind === "global" || + override.scope_kind === "provider" || + override.scope_kind === "provider_key" || + override.scope_kind === "virtual_key" || + override.scope_kind === "virtual_key_provider" || + override.scope_kind === "virtual_key_provider_key" + ) { + return override.scope_kind; + } + if (override.virtual_key_id) { + if (override.provider_key_id) return "virtual_key_provider_key"; + if (override.provider_id) return "virtual_key_provider"; + return "virtual_key"; + } + if (override.provider_key_id) return "provider_key"; + if (override.provider_id) return "provider"; + return "global"; +} + +function deriveScopeKind(form: FormState): PricingOverrideScopeKind { + if (form.scopeRoot === "virtual_key") { + if (form.providerKeyID) return "virtual_key_provider_key"; + if (form.providerID) return "virtual_key_provider"; + return "virtual_key"; + } + if (form.providerKeyID) return "provider_key"; + if (form.providerID) return "provider"; + return "global"; +} + +export function patchSummary(override: PricingOverride): string { + let parsed: Record = {}; + try { + if (override.pricing_patch) parsed = JSON.parse(override.pricing_patch); + } catch { + // ignore + } + const keys = Object.keys(parsed) as PricingFieldKey[]; + if (keys.length === 0) return "None"; + const labels = keys.map((key) => fieldLabelByKey[key] || key); + if (labels.length <= 2) return labels.join(", "); + return `${labels.slice(0, 2).join(", ")} +${labels.length - 2} more`; +} + +export function renderFields( + fields: ReadonlyArray<{ key: PricingFieldKey; label: string }>, + form: FormState, + setForm: Dispatch>, + errors: FieldErrors, + onFieldChange?: () => void, +) { + return ( +
+ {fields.map((field) => ( +
+ + { + onFieldChange?.(); + setForm((prev) => ({ + ...prev, + pricingValues: { ...prev.pricingValues, [field.key]: e.target.value }, + })); + }} + /> + {errors[field.key] &&

{errors[field.key]}

} +
+ ))} +
+ ); +} + + +interface PricingOverrideDrawerProps { + open: boolean; + onOpenChange: (open: boolean) => void; + editingOverride?: PricingOverride | null; + scopeLock?: { + scopeKind: PricingOverrideScopeKind; + virtualKeyID?: string; + providerID?: string; + providerKeyID?: string; + label?: string; + }; + onSaved?: () => void; +} + +function isCompleteScopeLock(scopeLock?: PricingOverrideDrawerProps["scopeLock"]): boolean { + if (!scopeLock) return false; + switch (scopeLock.scopeKind) { + case "global": + return true; + case "provider": + return Boolean(scopeLock.providerID); + case "provider_key": + return Boolean(scopeLock.providerKeyID); + case "virtual_key": + return Boolean(scopeLock.virtualKeyID); + case "virtual_key_provider": + return Boolean(scopeLock.virtualKeyID && scopeLock.providerID); + case "virtual_key_provider_key": + return Boolean(scopeLock.virtualKeyID && scopeLock.providerID && scopeLock.providerKeyID); + default: + return false; + } +} + +export default function PricingOverrideSheet({ open, onOpenChange, editingOverride, scopeLock, onSaved }: PricingOverrideDrawerProps) { + const { data: providersData, isLoading: isProvidersLoading, error: providersError } = useGetProvidersQuery(); + const { data: virtualKeysData, isLoading: isVirtualKeysLoading, error: virtualKeysError } = useGetVirtualKeysQuery(); + const [createOverride, { isLoading: isCreating }] = useCreatePricingOverrideMutation(); + const [updateOverride, { isLoading: isPatching }] = useUpdatePricingOverrideMutation(); + + const [form, setForm] = useState(defaultFormState); + const [jsonPatch, setJSONPatch] = useState(""); + const [jsonError, setJSONError] = useState(); + const jsonEditingRef = useRef(false); + const prevOpenRef = useRef(false); + const [requestTypePopoverOpen, setRequestTypePopoverOpen] = useState(false); + const shouldLockScope = useMemo(() => !editingOverride && isCompleteScopeLock(scopeLock), [editingOverride, scopeLock]); + + const isSaving = isCreating || isPatching; + const providers = useMemo(() => (providersError ? [] : (providersData ?? [])), [providersData, providersError]); + const virtualKeys = useMemo(() => (virtualKeysError ? [] : (virtualKeysData?.virtual_keys ?? [])), [virtualKeysData, virtualKeysError]); + + const providerKeyOptions = useMemo( + () => + providers.flatMap((provider) => + (provider.keys || []).map((key) => ({ + id: key.id, + providerName: provider.name, + label: key.name || key.id, + })), + ), + [providers], + ); + const providerScopedKeyOptions = useMemo( + () => providerKeyOptions.filter((key) => key.providerName === form.providerID), + [providerKeyOptions, form.providerID], + ); + + // Hydrate the form only when the sheet transitions from closed → open. + // This prevents providerKeyOptions refetches from resetting unsaved edits. + useEffect(() => { + const wasOpen = prevOpenRef.current; + prevOpenRef.current = open; + if (!open || wasOpen) return; + + jsonEditingRef.current = false; + setJSONError(undefined); + if (editingOverride) { + const state = toFormState(editingOverride); + // For provider_key scopes, provider_id is not stored in the DB (it's implicit from + // the key). Derive it from providerKeyOptions so the provider selector renders and + // the filtered key list shows the pre-selected key correctly. + if (!state.providerID && state.providerKeyID) { + const match = providerKeyOptions.find((k) => k.id === state.providerKeyID); + if (match) state.providerID = match.providerName; + } + setForm(state); + return; + } + if (shouldLockScope && scopeLock) { + const scopedForm: FormState = { + ...defaultFormState, + virtualKeyID: scopeLock.virtualKeyID ?? "", + providerID: scopeLock.providerID ?? "", + providerKeyID: scopeLock.providerKeyID ?? "", + scopeRoot: + scopeLock.scopeKind === "virtual_key" || + scopeLock.scopeKind === "virtual_key_provider" || + scopeLock.scopeKind === "virtual_key_provider_key" + ? "virtual_key" + : "global", + }; + setForm(scopedForm); + return; + } + setForm(defaultFormState); + }, [open, editingOverride, scopeLock, shouldLockScope, providerKeyOptions]); + + // When providerKeyOptions loads after the sheet is already open in edit mode, + // backfill the derived providerID without resetting the rest of the form. + useEffect(() => { + if (!open || !editingOverride) return; + setForm((prev) => { + if (prev.providerID || !prev.providerKeyID) return prev; + const match = providerKeyOptions.find((k) => k.id === prev.providerKeyID); + if (!match) return prev; + return { ...prev, providerID: match.providerName }; + }); + }, [providerKeyOptions, open, editingOverride]); + + const resolvedScopeKind = useMemo(() => { + if (shouldLockScope && scopeLock?.scopeKind) return scopeLock.scopeKind; + return deriveScopeKind(form); + }, [scopeLock, shouldLockScope, form]); + + const resolvedVirtualKeyID = useMemo(() => { + if (shouldLockScope) return scopeLock?.virtualKeyID; + return form.scopeRoot === "virtual_key" ? form.virtualKeyID || undefined : undefined; + }, [scopeLock, shouldLockScope, form.scopeRoot, form.virtualKeyID]); + + const resolvedProviderID = useMemo(() => { + if (shouldLockScope) return scopeLock?.providerID; + return form.providerID || undefined; + }, [scopeLock, shouldLockScope, form.providerID]); + + const resolvedProviderKeyID = useMemo(() => { + if (shouldLockScope) return scopeLock?.providerKeyID; + return form.providerKeyID || undefined; + }, [scopeLock, shouldLockScope, form.providerKeyID]); + + const pricingFieldErrors = useMemo(() => { + const errors: FieldErrors = {}; + for (const key of patchKeys) { + const raw = form.pricingValues[key]; + if (!raw || raw.trim() === "") continue; + const parsed = Number(raw); + if (!Number.isFinite(parsed)) errors[key] = "Must be a number"; + else if (parsed < 0) errors[key] = "Must be >= 0"; + } + return errors; + }, [form.pricingValues]); + + useEffect(() => { + if (!jsonEditingRef.current) { + const { patch } = buildPatchFromForm(form); + const json = Object.keys(patch).length > 0 ? JSON.stringify(patch, null, 2) : ""; + setJSONPatch(json); + setJSONError(undefined); + } + }, [form]); + + const handleJSONChange = useCallback((value: string) => { + jsonEditingRef.current = true; + setJSONPatch(value); + const trimmed = value.trim(); + if (!trimmed) { + setJSONError(undefined); + setForm((prev) => ({ ...prev, pricingValues: {} })); + return; + } + try { + const parsed = JSON.parse(trimmed); + if (parsed == null || typeof parsed !== "object" || Array.isArray(parsed)) { + setJSONError("Patch must be a JSON object"); + return; + } + const pricingValues: Partial> = {}; + for (const [key, val] of Object.entries(parsed)) { + if (!patchKeys.includes(key as PricingFieldKey)) { + setJSONError(`Unknown field: ${key}`); + return; + } + if (typeof val !== "number" || Number.isNaN(val) || val < 0) { + setJSONError(`${key} must be a non-negative number`); + return; + } + pricingValues[key as PricingFieldKey] = String(val); + } + setJSONError(undefined); + setForm((prev) => ({ ...prev, pricingValues })); + } catch { + setJSONError("Invalid JSON"); + } + }, []); + + const handleFieldChange = useCallback(() => { + jsonEditingRef.current = false; + }, []); + + + const handleCloseDrawer = () => { + onOpenChange(false); + setRequestTypePopoverOpen(false); + }; + + const toggleRequestType = (requestType: string) => { + setForm((prev) => ({ + ...prev, + requestTypes: prev.requestTypes.includes(requestType) + ? prev.requestTypes.filter((item) => item !== requestType) + : [...prev.requestTypes, requestType], + })); + }; + + const handleSave = async () => { + if (!form.name.trim()) { + toast.error("Name is required"); + return; + } + + if ( + (resolvedScopeKind === "virtual_key" || + resolvedScopeKind === "virtual_key_provider" || + resolvedScopeKind === "virtual_key_provider_key") && + !resolvedVirtualKeyID + ) { + toast.error("Virtual key is required"); + return; + } + if ((resolvedScopeKind === "provider" || resolvedScopeKind === "virtual_key_provider") && !resolvedProviderID) { + toast.error("Provider is required"); + return; + } + if (resolvedScopeKind === "provider_key" && !resolvedProviderKeyID) { + toast.error("Provider key is required"); + return; + } + if (resolvedScopeKind === "virtual_key_provider_key" && (!resolvedProviderID || !resolvedProviderKeyID)) { + toast.error("Provider and provider key are required"); + return; + } + + const pError = patternError(form.matchType, form.pattern); + if (pError) { + toast.error(pError); + return; + } + + if (form.requestTypes.length === 0) { + toast.error("At least one request type must be selected"); + return; + } + + if (jsonError) { + toast.error("Fix the JSON error before saving"); + return; + } + + const { patch, errors: pricingErrors } = buildPatchFromForm(form); + const firstPricingError = Object.values(pricingErrors)[0]; + if (firstPricingError) { + toast.error(firstPricingError); + return; + } + if (Object.keys(patch).length === 0) { + toast.error("At least one pricing field must be overridden"); + return; + } + + let scopedVirtualKeyID: string | undefined; + let scopedProviderID: string | undefined; + let scopedProviderKeyID: string | undefined; + + switch (resolvedScopeKind) { + case "global": + break; + case "provider": + scopedProviderID = resolvedProviderID; + break; + case "provider_key": + scopedProviderKeyID = resolvedProviderKeyID; + break; + case "virtual_key": + scopedVirtualKeyID = resolvedVirtualKeyID; + break; + case "virtual_key_provider": + scopedVirtualKeyID = resolvedVirtualKeyID; + scopedProviderID = resolvedProviderID; + break; + case "virtual_key_provider_key": + scopedVirtualKeyID = resolvedVirtualKeyID; + scopedProviderID = resolvedProviderID; + scopedProviderKeyID = resolvedProviderKeyID; + break; + } + + const requestPayload: CreatePricingOverrideRequest = { + name: form.name.trim(), + scope_kind: resolvedScopeKind, + virtual_key_id: scopedVirtualKeyID, + provider_id: scopedProviderID, + provider_key_id: scopedProviderKeyID, + match_type: form.matchType, + pattern: form.pattern.trim(), + request_types: form.requestTypes.length > 0 ? form.requestTypes : [], + patch, + }; + + try { + if (editingOverride) { + await updateOverride({ id: editingOverride.id, data: requestPayload }).unwrap(); + toast.success("Pricing override updated"); + } else { + await createOverride(requestPayload).unwrap(); + toast.success("Pricing override created"); + } + handleCloseDrawer(); + onSaved?.(); + } catch (error) { + toast.error("Failed to save pricing override", { description: getErrorMessage(error) }); + } + }; + + + return ( + (o ? onOpenChange(true) : handleCloseDrawer())}> + + + {editingOverride ? "Edit Pricing Override" : "Create Pricing Override"} + + +
+
+
+ + setForm((prev) => ({ ...prev, name: e.target.value }))} /> +
+ + {shouldLockScope && scopeLock ? ( +
+ + +
+ ) : ( + <> +
+ + +
+ + {form.scopeRoot === "virtual_key" && ( +
+ + + {virtualKeysError &&

Failed to load virtual keys: {getErrorMessage(virtualKeysError)}

} +
+ )} + +
+
+ + + {providersError &&

Failed to load providers: {getErrorMessage(providersError)}

} +
+ + {form.providerID ? ( +
+ + +
+ ) : ( +
+ )} +
+ + + )} +
+ +
+
+
+ + +
+
+ + setForm((prev) => ({ ...prev, pattern: e.target.value }))} + placeholder={form.matchType === "exact" ? "e.g., gpt-4o" : "e.g., gpt-4*"} + /> +
+
+
+ +
+ + + + + + e.stopPropagation()}> +
e.stopPropagation()}> + {REQUEST_TYPE_GROUPS.map((group) => ( +
+
{group.label}
+ {group.types.map((requestType) => { + const checked = form.requestTypes.includes(requestType); + return ( + + ); + })} +
+ ))} +
+
+ +
+
+
+
+ +
+ + { + handleFieldChange(); + setForm((prev) => ({ ...prev, pricingValues: { ...prev.pricingValues, [key]: value } })); + }} + onFieldInteraction={handleFieldChange} + /> +
+ +
+ +
+ +
+ {jsonError &&

{jsonError}

} +
+
+ +
+ + +
+ + + ); +} diff --git a/ui/app/workspace/custom-pricing/overrides/pricingOverridesEmptyState.tsx b/ui/app/workspace/custom-pricing/overrides/pricingOverridesEmptyState.tsx new file mode 100644 index 0000000000..52c6dae93b --- /dev/null +++ b/ui/app/workspace/custom-pricing/overrides/pricingOverridesEmptyState.tsx @@ -0,0 +1,45 @@ +"use client"; + +import { Button } from "@/components/ui/button"; +import { ArrowUpRight, SlidersHorizontal } from "lucide-react"; + +const PRICING_OVERRIDES_DOCS_URL = "https://docs.getbifrost.ai/features/governance/custom-pricing"; + +interface PricingOverridesEmptyStateProps { + onCreateClick: () => void; +} + +export function PricingOverridesEmptyState({ onCreateClick }: PricingOverridesEmptyStateProps) { + return ( +
+
+ +
+
+

Pricing overrides customize cost tracking per scope

+
+ Define custom per-token prices for specific providers, keys, or virtual keys to accurately reflect your negotiated rates. +
+
+ + +
+
+
+ ); +} diff --git a/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx b/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx new file mode 100644 index 0000000000..5432ee5abd --- /dev/null +++ b/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx @@ -0,0 +1,387 @@ +"use client"; + +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from "@/components/ui/alertDialog"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; +import { + getErrorMessage, + useDeletePricingOverrideMutation, + useGetPricingOverridesQuery, + useGetProvidersQuery, + useGetVirtualKeysQuery, +} from "@/lib/store"; +import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons"; +import { getProviderLabel } from "@/lib/constants/logs"; +import { PricingOverride, PricingOverrideScopeKind } from "@/lib/types/governance"; +import { useDebouncedValue } from "@/hooks/useDebounce"; +import { Input } from "@/components/ui/input"; +import { ChevronLeft, ChevronRight, Edit, Search, Trash2 } from "lucide-react"; +import { useSearchParams } from "next/navigation"; +import { useEffect, useMemo, useState } from "react"; +import { toast } from "sonner"; +import PricingOverrideSheet from "./pricingOverrideSheet"; +import { PricingOverridesEmptyState } from "./pricingOverridesEmptyState"; + +type ScopeFilter = "all" | PricingOverrideScopeKind; + +function parseScopeKind(value: string | null): ScopeFilter { + if ( + value === "global" || + value === "provider" || + value === "provider_key" || + value === "virtual_key" || + value === "virtual_key_provider" || + value === "virtual_key_provider_key" + ) { + return value; + } + return "all"; +} + +// Returns the top-level scope label: "Global" or the virtual key name. +function scopeLabel(override: PricingOverride, virtualKeyMap: Map): string { + const scopeKind = resolveScopeKind(override); + if (override.virtual_key_id && scopeKind.startsWith("virtual_key")) { + return "Virtual Key"; + } + return "Global"; +} + +// Returns the key label for the override, or "-" when no specific key is scoped. +function keyLabel(override: PricingOverride, keyLabelMap: Map): string { + if (!override.provider_key_id) { + if (!override.provider_id) return "-"; + return "All Keys" + }; + return keyLabelMap.get(override.provider_key_id) || override.provider_key_id; +} + +// Returns the provider label for the override, or "-" if not applicable. +function providerLabel(override: PricingOverride, providerMap: Map, keyProviderMap: Map): string { + const scopeKind = resolveScopeKind(override); + switch (scopeKind) { + case "provider": + case "virtual_key_provider": + return providerMap.get(override.provider_id || "") || override.provider_id || "-"; + case "provider_key": + case "virtual_key_provider_key": { + const keyID = override.provider_key_id || ""; + return providerMap.get(keyProviderMap.get(keyID) || "") || keyProviderMap.get(keyID) || "-"; + } + default: + return "-"; + } +} + +function resolveScopeKind(override: PricingOverride): PricingOverrideScopeKind { + if ( + override.scope_kind === "global" || + override.scope_kind === "provider" || + override.scope_kind === "provider_key" || + override.scope_kind === "virtual_key" || + override.scope_kind === "virtual_key_provider" || + override.scope_kind === "virtual_key_provider_key" + ) { + return override.scope_kind; + } + if (override.virtual_key_id) { + if (override.provider_key_id) return "virtual_key_provider_key"; + if (override.provider_id) return "virtual_key_provider"; + return "virtual_key"; + } + if (override.provider_key_id) return "provider_key"; + if (override.provider_id) return "provider"; + return "global"; +} + +const PAGE_SIZE = 25; + +export default function ScopedPricingOverridesView() { + const searchParams = useSearchParams(); + + const [scopeKind, setScopeKind] = useState(() => parseScopeKind(searchParams.get("scope_kind"))); + const [virtualKeyID, setVirtualKeyID] = useState(() => (searchParams.get("virtual_key_id") || "").trim()); + const [providerID, setProviderID] = useState(() => (searchParams.get("provider_id") || "").trim()); + const [providerKeyID, setProviderKeyID] = useState(() => (searchParams.get("provider_key_id") || "").trim()); + + const [search, setSearch] = useState(""); + const [offset, setOffset] = useState(0); + const debouncedSearch = useDebouncedValue(search, 300); + + useEffect(() => { + setScopeKind(parseScopeKind(searchParams.get("scope_kind"))); + setVirtualKeyID((searchParams.get("virtual_key_id") || "").trim()); + setProviderID((searchParams.get("provider_id") || "").trim()); + setProviderKeyID((searchParams.get("provider_key_id") || "").trim()); + }, [searchParams]); + + // Reset to first page when filters or search change + useEffect(() => { + setOffset(0); + }, [scopeKind, virtualKeyID, providerID, providerKeyID, debouncedSearch]); + + const queryArgs = useMemo(() => ({ + scopeKind: scopeKind === "all" ? undefined : scopeKind, + virtualKeyID: virtualKeyID || undefined, + providerID: providerID || undefined, + providerKeyID: providerKeyID || undefined, + limit: PAGE_SIZE, + offset, + search: debouncedSearch || undefined, + }), [scopeKind, virtualKeyID, providerID, providerKeyID, offset, debouncedSearch]); + + const { data, isLoading, error } = useGetPricingOverridesQuery(queryArgs); + + // Snap offset back when total shrinks past current page + const totalCount = data?.total_count ?? 0; + useEffect(() => { + if (!data || offset < totalCount) return; + setOffset(totalCount === 0 ? 0 : Math.floor((totalCount - 1) / PAGE_SIZE) * PAGE_SIZE); + }, [totalCount, offset]); + const { data: providersData } = useGetProvidersQuery(); + const { data: virtualKeysData } = useGetVirtualKeysQuery(); + const [deleteOverride, { isLoading: isDeleting }] = useDeletePricingOverrideMutation(); + + useEffect(() => { + if (error) { + toast.error("Failed to load pricing overrides", { description: getErrorMessage(error) }); + } + }, [error]); + + const [isDrawerOpen, setIsDrawerOpen] = useState(false); + const [editingOverride, setEditingOverride] = useState(null); + const [deleteTarget, setDeleteTarget] = useState(null); + + const rows = data?.pricing_overrides ?? []; + const providers = useMemo(() => providersData ?? [], [providersData]); + const virtualKeys = useMemo(() => virtualKeysData?.virtual_keys ?? [], [virtualKeysData]); + + const providerMap = useMemo(() => new Map(providers.map((provider) => [provider.name, provider.name])), [providers]); + const providerKeyOptions = useMemo( + () => + providers.flatMap((provider) => + (provider.keys || []).map((key) => ({ + id: key.id, + label: key.name || key.id, + providerName: provider.name, + })), + ), + [providers], + ); + const providerKeyProviderMap = useMemo( + () => new Map(providerKeyOptions.map((key) => [key.id, key.providerName])), + [providerKeyOptions], + ); + const providerKeyLabelMap = useMemo( + () => new Map(providerKeyOptions.map((key) => [key.id, key.label])), + [providerKeyOptions], + ); + const virtualKeyMap = useMemo(() => new Map(virtualKeys.map((vk) => [vk.id, vk.name])), [virtualKeys]); + + const createScopeLock = useMemo(() => { + if (scopeKind === "all") return undefined; + return { + scopeKind, + virtualKeyID: virtualKeyID || undefined, + providerID: providerID || undefined, + providerKeyID: providerKeyID || undefined, + label: `${scopeKind}${virtualKeyID || providerID || providerKeyID ? " (filtered)" : ""}`, + }; + }, [scopeKind, virtualKeyID, providerID, providerKeyID]); + + const openCreateDrawer = () => { + setEditingOverride(null); + setIsDrawerOpen(true); + }; + + const openEditDrawer = (override: PricingOverride) => { + setEditingOverride(override); + setIsDrawerOpen(true); + }; + + const handleDeleteConfirm = async () => { + if (!deleteTarget) return; + try { + await deleteOverride(deleteTarget.id).unwrap(); + toast.success("Pricing override deleted"); + setDeleteTarget(null); + } catch (deleteError) { + toast.error("Failed to delete pricing override", { description: getErrorMessage(deleteError) }); + } + }; + + const hasActiveFilters = debouncedSearch || scopeKind !== "all" || virtualKeyID || providerID || providerKeyID; + + if (!isLoading && !error && totalCount === 0 && !hasActiveFilters) { + return ( + <> + + + + ); + } + + return ( +
+
+
+

Pricing Overrides

+

Set custom rates for any model across global or virtual key scopes, optionally narrowed to a specific provider or key

+
+ +
+ + {/* Search */} +
+ + setSearch(e.target.value)} + className="pl-9" + data-testid="pricing-overrides-search-input" + /> +
+ +
+ {isLoading ? ( +
Loading overrides...
+ ) : error ? ( +
Failed to load pricing overrides. Please try refreshing the page.
+ ) : ( + + + + Name + Scope + Provider + Key + Model + Actions + + + + {rows.length === 0 ? ( + + + No matching pricing overrides found. + + + ) : rows.map((row) => ( + + {row.name || "-"} + + {scopeLabel(row, virtualKeyMap)} + + + {(() => { + const name = providerLabel(row, providerMap, providerKeyProviderMap); + if (name === "-") return -; + return ( +
+ + {getProviderLabel(name)} +
+ ); + })()} +
+ {keyLabel(row, providerKeyLabelMap)} + {row.pattern} + e.stopPropagation()}> +
+ + +
+
+
+ ))} +
+
+ )} +
+ + {/* Pagination */} + {totalCount > 0 && ( +
+

+ Showing {offset + 1}-{Math.min(offset + PAGE_SIZE, totalCount)} of {totalCount} +

+
+ + +
+
+ )} + + + + (!open ? setDeleteTarget(null) : undefined)}> + + + Delete Pricing Override + + Are you sure you want to delete "{deleteTarget?.name}"? This action cannot be undone. + + + + Cancel + { + e.preventDefault(); + void handleDeleteConfirm(); + }} + disabled={isDeleting} + className="bg-destructive hover:bg-destructive/90" + > + {isDeleting ? "Deleting..." : "Delete"} + + + + +
+ ); +} diff --git a/ui/app/workspace/providers/fragments/index.ts b/ui/app/workspace/providers/fragments/index.ts index 5d295be2a1..8a9fc1c308 100644 --- a/ui/app/workspace/providers/fragments/index.ts +++ b/ui/app/workspace/providers/fragments/index.ts @@ -6,5 +6,4 @@ export { GovernanceFormFragment } from "./governanceFormFragment"; export { NetworkFormFragment } from "./networkFormFragment"; export { PerformanceFormFragment } from "./performanceFormFragment"; export { PerformanceFormFragment as PerformanceTab } from "./performanceFormFragment"; -export { PricingOverridesFormFragment } from "./pricingOverridesFormFragment"; export { ProxyFormFragment } from "./proxyFormFragment"; diff --git a/ui/app/workspace/providers/fragments/pricingOverridesFormFragment.tsx b/ui/app/workspace/providers/fragments/pricingOverridesFormFragment.tsx deleted file mode 100644 index 3d94b14b15..0000000000 --- a/ui/app/workspace/providers/fragments/pricingOverridesFormFragment.tsx +++ /dev/null @@ -1,136 +0,0 @@ -"use client"; - -import { Button } from "@/components/ui/button"; -import { Textarea } from "@/components/ui/textarea"; -import { getErrorMessage, setProviderFormDirtyState, useAppDispatch } from "@/lib/store"; -import { useUpdateProviderMutation } from "@/lib/store/apis/providersApi"; -import { ModelProvider } from "@/lib/types/config"; -import { providerPricingOverrideSchema } from "@/lib/types/schemas"; -import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"; -import { useEffect, useMemo, useState } from "react"; -import { toast } from "sonner"; -import { z } from "zod"; - -interface PricingOverridesFormFragmentProps { - provider: ModelProvider; -} - -const pricingOverridesArraySchema = z.array(providerPricingOverrideSchema); - -const toPrettyJSON = (value: unknown) => JSON.stringify(value, null, 2); - -export function PricingOverridesFormFragment({ provider }: PricingOverridesFormFragmentProps) { - const dispatch = useAppDispatch(); - const hasUpdateProviderAccess = useRbac(RbacResource.ModelProvider, RbacOperation.Update); - const [updateProvider, { isLoading: isUpdatingProvider }] = useUpdateProviderMutation(); - const initialValue = useMemo(() => toPrettyJSON(provider.pricing_overrides ?? []), [provider.pricing_overrides]); - const [overridesJSON, setOverridesJSON] = useState(initialValue); - const [validationError, setValidationError] = useState(""); - const [hasUserEdits, setHasUserEdits] = useState(false); - const isDirty = hasUserEdits && overridesJSON !== initialValue; - - useEffect(() => { - if (isDirty) { - return; - } - setOverridesJSON(initialValue); - setValidationError(""); - }, [initialValue, isDirty, provider.name]); - - useEffect(() => { - dispatch(setProviderFormDirtyState(isDirty)); - }, [dispatch, isDirty]); - - const onReset = () => { - setOverridesJSON(initialValue); - setValidationError(""); - setHasUserEdits(false); - }; - - const onSave = async () => { - let parsed: unknown; - try { - parsed = JSON.parse(overridesJSON); - } catch { - setValidationError("Invalid JSON format."); - return; - } - - const validated = pricingOverridesArraySchema.safeParse(parsed); - if (!validated.success) { - setValidationError(validated.error.issues[0]?.message || "Invalid pricing overrides configuration."); - return; - } - - setValidationError(""); - - try { - await updateProvider({ - ...provider, - }).unwrap(); - toast.success("Pricing overrides updated successfully"); - setOverridesJSON(toPrettyJSON(validated.data)); - setHasUserEdits(false); - } catch (err) { - toast.error("Failed to update pricing overrides", { - description: getErrorMessage(err), - }); - } - }; - - return ( -
-
-

Provider Pricing Overrides

-

- Enter a JSON array of override objects. Match precedence is exact > wildcard > regex. Unspecified fields fall back to - datasheet pricing. -

-
- -