diff --git a/cli/go.mod b/cli/go.mod
index 2c1c930bc2..c6a77eaf26 100644
--- a/cli/go.mod
+++ b/cli/go.mod
@@ -46,7 +46,7 @@ require (
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/arch v0.23.0 // indirect
- golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect
+ golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 // indirect
golang.org/x/sys v0.41.0 // indirect
golang.org/x/text v0.33.0 // indirect
)
diff --git a/cli/go.sum b/cli/go.sum
index e6e613043a..9746bb475a 100644
--- a/cli/go.sum
+++ b/cli/go.sum
@@ -89,8 +89,7 @@ github.com/zalando/go-keyring v0.2.6 h1:r7Yc3+H+Ux0+M72zacZoItR3UDxeWfKTcabvkI8u
github.com/zalando/go-keyring v0.2.6/go.mod h1:2TCrxYrbUNYfNS/Kgy/LSrkSQzZ5UPVH85RwfczwvcI=
golang.org/x/arch v0.23.0 h1:lKF64A2jF6Zd8L0knGltUnegD62JMFBiCPBmQpToHhg=
golang.org/x/arch v0.23.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
-golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM=
-golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8=
+golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 h1:zfMcR1Cs4KNuomFFgGefv5N0czO2XZpUbxGUy8i8ug0=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
diff --git a/core/bifrost.go b/core/bifrost.go
index f805ed03df..32959ed30a 100644
--- a/core/bifrost.go
+++ b/core/bifrost.go
@@ -4815,7 +4815,7 @@ func executeRequestWithRetries[T any](
} else {
// Populate LLM response attributes for non-streaming responses
if resp, ok := any(result).(*schemas.BifrostResponse); ok {
- tracer.PopulateLLMResponseAttributes(handle, resp, bifrostError)
+ tracer.PopulateLLMResponseAttributes(ctx, handle, resp, bifrostError)
}
// End span with appropriate status
diff --git a/core/providers/utils/utils.go b/core/providers/utils/utils.go
index e48693e97d..3d722bc71b 100644
--- a/core/providers/utils/utils.go
+++ b/core/providers/utils/utils.go
@@ -2611,10 +2611,10 @@ func completeDeferredSpan(ctx *schemas.BifrostContext, result *schemas.BifrostRe
if accumulatedResp != nil {
// Use accumulated response for attributes (includes full content, tool calls, etc.)
- tracer.PopulateLLMResponseAttributes(handle, accumulatedResp, err)
+ tracer.PopulateLLMResponseAttributes(ctx, handle, accumulatedResp, err)
} else if result != nil {
// Fall back to final chunk if no accumulated data (shouldn't happen normally)
- tracer.PopulateLLMResponseAttributes(handle, result, err)
+ tracer.PopulateLLMResponseAttributes(ctx, handle, result, err)
}
// Finalize aggregated post-hook spans before ending the LLM span
diff --git a/core/schemas/provider.go b/core/schemas/provider.go
index 10d3a9d1ac..41e9cfebf0 100644
--- a/core/schemas/provider.go
+++ b/core/schemas/provider.go
@@ -8,15 +8,15 @@ import (
)
const (
- DefaultMaxRetries = 0
- DefaultRetryBackoffInitial = 500 * time.Millisecond
- DefaultRetryBackoffMax = 5 * time.Second
+ DefaultMaxRetries = 0
+ DefaultRetryBackoffInitial = 500 * time.Millisecond
+ DefaultRetryBackoffMax = 5 * time.Second
DefaultRequestTimeoutInSeconds = 30
- DefaultMaxConnDurationInSeconds = 300 // 5 minutes — forces connection recycling to prevent stale connections from NAT/LB silent drops
- DefaultBufferSize = 5000
- DefaultConcurrency = 1000
- DefaultStreamBufferSize = 256
- DefaultStreamIdleTimeoutInSeconds = 60 // Idle timeout per stream chunk — if no data for this many seconds, bifrost closes the connection
+ DefaultMaxConnDurationInSeconds = 300 // 5 minutes — forces connection recycling to prevent stale connections from NAT/LB silent drops
+ DefaultBufferSize = 5000
+ DefaultConcurrency = 1000
+ DefaultStreamBufferSize = 256
+ DefaultStreamIdleTimeoutInSeconds = 60 // Idle timeout per stream chunk — if no data for this many seconds, bifrost closes the connection
)
// Pre-defined errors for provider operations
@@ -49,14 +49,14 @@ const (
// - When marshaling to JSON: a time.Duration is converted to milliseconds
type NetworkConfig struct {
// BaseURL is supported for OpenAI, Anthropic, Cohere, Mistral, and Ollama providers (required for Ollama)
- BaseURL string `json:"base_url,omitempty"` // Base URL for the provider (optional)
- ExtraHeaders map[string]string `json:"extra_headers,omitempty"` // Additional headers to include in requests (optional)
- DefaultRequestTimeoutInSeconds int `json:"default_request_timeout_in_seconds"` // Default timeout for requests
- MaxRetries int `json:"max_retries"` // Maximum number of retries
- RetryBackoffInitial time.Duration `json:"retry_backoff_initial"` // Initial backoff duration (stored as nanoseconds, JSON as milliseconds)
- RetryBackoffMax time.Duration `json:"retry_backoff_max"` // Maximum backoff duration (stored as nanoseconds, JSON as milliseconds)
- InsecureSkipVerify bool `json:"insecure_skip_verify,omitempty"` // Disables TLS certificate verification for provider connections
- CACertPEM string `json:"ca_cert_pem,omitempty"` // PEM-encoded CA certificate to trust for provider endpoint connections
+ BaseURL string `json:"base_url,omitempty"` // Base URL for the provider (optional)
+ ExtraHeaders map[string]string `json:"extra_headers,omitempty"` // Additional headers to include in requests (optional)
+ DefaultRequestTimeoutInSeconds int `json:"default_request_timeout_in_seconds"` // Default timeout for requests
+ MaxRetries int `json:"max_retries"` // Maximum number of retries
+ RetryBackoffInitial time.Duration `json:"retry_backoff_initial"` // Initial backoff duration (stored as nanoseconds, JSON as milliseconds)
+ RetryBackoffMax time.Duration `json:"retry_backoff_max"` // Maximum backoff duration (stored as nanoseconds, JSON as milliseconds)
+ InsecureSkipVerify bool `json:"insecure_skip_verify,omitempty"` // Disables TLS certificate verification for provider connections
+ CACertPEM string `json:"ca_cert_pem,omitempty"` // PEM-encoded CA certificate to trust for provider endpoint connections
StreamIdleTimeoutInSeconds int `json:"stream_idle_timeout_in_seconds,omitempty"` // Idle timeout per stream chunk (0 = use default 60s)
}
@@ -387,67 +387,6 @@ type CustomProviderConfig struct {
RequestPathOverrides map[RequestType]string `json:"request_path_overrides,omitempty"` // Mapping of request type to its custom path which will override the default path of the provider (not allowed for Bedrock)
}
-type PricingOverrideMatchType string
-
-const (
- PricingOverrideMatchExact PricingOverrideMatchType = "exact"
- PricingOverrideMatchWildcard PricingOverrideMatchType = "wildcard"
- PricingOverrideMatchRegex PricingOverrideMatchType = "regex"
-)
-
-// ProviderPricingOverride contains a partial pricing patch applied at lookup time.
-// Any nil field falls back to the base pricing data.
-type ProviderPricingOverride struct {
- ModelPattern string `json:"model_pattern"`
- MatchType PricingOverrideMatchType `json:"match_type"`
- RequestTypes []RequestType `json:"request_types,omitempty"`
-
- // Basic token pricing
- InputCostPerToken *float64 `json:"input_cost_per_token,omitempty"`
- OutputCostPerToken *float64 `json:"output_cost_per_token,omitempty"`
-
- // Additional pricing for media
- InputCostPerVideoPerSecond *float64 `json:"input_cost_per_video_per_second,omitempty"`
- InputCostPerAudioPerSecond *float64 `json:"input_cost_per_audio_per_second,omitempty"`
-
- // Character-based pricing
- InputCostPerCharacter *float64 `json:"input_cost_per_character,omitempty"`
-
- // Pricing above 128k tokens
- InputCostPerTokenAbove128kTokens *float64 `json:"input_cost_per_token_above_128k_tokens,omitempty"`
- InputCostPerImageAbove128kTokens *float64 `json:"input_cost_per_image_above_128k_tokens,omitempty"`
- InputCostPerVideoPerSecondAbove128kTokens *float64 `json:"input_cost_per_video_per_second_above_128k_tokens,omitempty"`
- InputCostPerAudioPerSecondAbove128kTokens *float64 `json:"input_cost_per_audio_per_second_above_128k_tokens,omitempty"`
- OutputCostPerTokenAbove128kTokens *float64 `json:"output_cost_per_token_above_128k_tokens,omitempty"`
-
- // Pricing above 200k tokens
- InputCostPerTokenAbove200kTokens *float64 `json:"input_cost_per_token_above_200k_tokens,omitempty"`
- OutputCostPerTokenAbove200kTokens *float64 `json:"output_cost_per_token_above_200k_tokens,omitempty"`
- CacheCreationInputTokenCostAbove200kTokens *float64 `json:"cache_creation_input_token_cost_above_200k_tokens,omitempty"`
- CacheReadInputTokenCostAbove200kTokens *float64 `json:"cache_read_input_token_cost_above_200k_tokens,omitempty"`
-
- // Cache and batch pricing
- CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost,omitempty"`
- CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost,omitempty"`
- InputCostPerTokenBatches *float64 `json:"input_cost_per_token_batches,omitempty"`
- OutputCostPerTokenBatches *float64 `json:"output_cost_per_token_batches,omitempty"`
-
- // Image generation pricing
- InputCostPerImageToken *float64 `json:"input_cost_per_image_token,omitempty"`
- OutputCostPerImageToken *float64 `json:"output_cost_per_image_token,omitempty"`
- InputCostPerImage *float64 `json:"input_cost_per_image,omitempty"`
- OutputCostPerImage *float64 `json:"output_cost_per_image,omitempty"`
- OutputCostPerImageAbove1024x1024Pixels *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels,omitempty"`
- OutputCostPerImageAbove1024x1024PixelsPremium *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels_and_premium_image,omitempty"`
- OutputCostPerImageAbove2048x2048Pixels *float64 `json:"output_cost_per_image_above_2048_and_2048_pixels,omitempty"`
- OutputCostPerImageAbove4096x4096Pixels *float64 `json:"output_cost_per_image_above_4096_and_4096_pixels,omitempty"`
- OutputCostPerImageLowQuality *float64 `json:"output_cost_per_image_low_quality,omitempty"`
- OutputCostPerImageMediumQuality *float64 `json:"output_cost_per_image_medium_quality,omitempty"`
- OutputCostPerImageHighQuality *float64 `json:"output_cost_per_image_high_quality,omitempty"`
- OutputCostPerImageAutoQuality *float64 `json:"output_cost_per_image_auto_quality,omitempty"`
- CacheReadInputImageTokenCost *float64 `json:"cache_read_input_image_token_cost,omitempty"`
-}
-
// IsOperationAllowed checks if a specific operation is allowed for this custom provider
func (cpc *CustomProviderConfig) IsOperationAllowed(operation RequestType) bool {
if cpc == nil || cpc.AllowedRequests == nil {
@@ -463,13 +402,12 @@ type ProviderConfig struct {
NetworkConfig NetworkConfig `json:"network_config"` // Network configuration
ConcurrencyAndBufferSize ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings
// Logger instance, can be provided by the user or bifrost default logger is used if not provided
- Logger Logger `json:"-"`
- ProxyConfig *ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration
- SendBackRawRequest bool `json:"send_back_raw_request"` // Send raw request back in the bifrost response (default: false)
- SendBackRawResponse bool `json:"send_back_raw_response"` // Send raw response back in the bifrost response (default: false)
- StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)
- CustomProviderConfig *CustomProviderConfig `json:"custom_provider_config,omitempty"`
- PricingOverrides []ProviderPricingOverride `json:"pricing_overrides,omitempty"`
+ Logger Logger `json:"-"`
+ ProxyConfig *ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration
+ SendBackRawRequest bool `json:"send_back_raw_request"` // Send raw request back in the bifrost response (default: false)
+ SendBackRawResponse bool `json:"send_back_raw_response"` // Send raw response back in the bifrost response (default: false)
+ StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)
+ CustomProviderConfig *CustomProviderConfig `json:"custom_provider_config,omitempty"`
}
func (config *ProviderConfig) CheckAndSetDefaults() {
diff --git a/core/schemas/tracer.go b/core/schemas/tracer.go
index 74f4442b47..820d88c9dc 100644
--- a/core/schemas/tracer.go
+++ b/core/schemas/tracer.go
@@ -68,7 +68,7 @@ type Tracer interface {
// PopulateLLMResponseAttributes populates all LLM-specific response attributes on the span.
// This includes output messages, tokens, usage stats, and error information if present.
- PopulateLLMResponseAttributes(handle SpanHandle, resp *BifrostResponse, err *BifrostError)
+ PopulateLLMResponseAttributes(ctx *BifrostContext, handle SpanHandle, resp *BifrostResponse, err *BifrostError)
// StoreDeferredSpan stores a span handle for later completion (used for streaming requests).
// The span handle is stored keyed by trace ID so it can be retrieved when the stream completes.
@@ -144,7 +144,7 @@ func (n *NoOpTracer) AddEvent(_ SpanHandle, _ string, _ map[string]any) {}
func (n *NoOpTracer) PopulateLLMRequestAttributes(_ SpanHandle, _ *BifrostRequest) {}
// PopulateLLMResponseAttributes does nothing.
-func (n *NoOpTracer) PopulateLLMResponseAttributes(_ SpanHandle, _ *BifrostResponse, _ *BifrostError) {
+func (n *NoOpTracer) PopulateLLMResponseAttributes(_ *BifrostContext, _ SpanHandle, _ *BifrostResponse, _ *BifrostError) {
}
// StoreDeferredSpan does nothing.
diff --git a/docs/architecture/framework/model-catalog.mdx b/docs/architecture/framework/model-catalog.mdx
index 53e05433c6..76daad3871 100644
--- a/docs/architecture/framework/model-catalog.mdx
+++ b/docs/architecture/framework/model-catalog.mdx
@@ -189,6 +189,7 @@ Calculate costs from a Bifrost response:
// Calculate cost for a completed request
cost := modelCatalog.CalculateCost(
result, // *schemas.BifrostResponse
+ nil, // *PricingLookupScopes (nil = no scoped overrides)
)
logger.Info("Request cost: $%.6f", cost)
@@ -199,7 +200,7 @@ logger.Info("Request cost: $%.6f", cost)
```go
// CalculateCost handles all cost scenarios including cache-aware pricing
-cost := modelCatalog.CalculateCost(result) // *schemas.BifrostResponse
+cost := modelCatalog.CalculateCost(result, nil) // *schemas.BifrostResponse, *PricingLookupScopes
// Cache hits return 0 for direct hits, embedding cost for semantic matches
// Cache misses return base model cost + embedding generation cost
diff --git a/docs/docs.json b/docs/docs.json
index 22e8925d2d..bdbcd02d8f 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -154,6 +154,7 @@
"providers/reasoning",
"providers/performance",
"providers/custom-providers",
+ "providers/custom-pricing",
"providers/request-options"
]
},
diff --git a/docs/media/ui-custom-pricing-form.png b/docs/media/ui-custom-pricing-form.png
new file mode 100644
index 0000000000..4bdefd5731
Binary files /dev/null and b/docs/media/ui-custom-pricing-form.png differ
diff --git a/docs/media/ui-custom-pricing-table.png b/docs/media/ui-custom-pricing-table.png
new file mode 100644
index 0000000000..470eaa9902
Binary files /dev/null and b/docs/media/ui-custom-pricing-table.png differ
diff --git a/docs/openapi/openapi.json b/docs/openapi/openapi.json
index aa1671ca22..21eaeb3f5b 100644
--- a/docs/openapi/openapi.json
+++ b/docs/openapi/openapi.json
@@ -2,7 +2,7 @@
"openapi": "3.1.0",
"info": {
"title": "Bifrost API",
- "description": "Bifrost HTTP Transport API for AI model inference and gateway management.\n\nThis API provides a unified interface for interacting with multiple AI providers\nincluding OpenAI, Anthropic, Bedrock, Gemini, and more through a single API,\nalong with comprehensive management APIs for configuring and monitoring the gateway.\n\n## API Structure\n\n### Unified Inference API (`/v1/*`)\nThe primary API using Bifrost's unified format. Model parameters use the format\n`provider/model` (e.g., `openai/gpt-4`, `anthropic/claude-3-opus`).\n\n### Async Inference API (`/v1/async/*`)\nSubmit inference requests for asynchronous execution. Returns a job ID immediately\nand allows polling for results. Supports all inference types except batches, files,\nand containers.\n\n### Provider Integration APIs\nNative provider-format APIs for drop-in compatibility:\n- `/openai/*` - OpenAI-compatible API\n- `/anthropic/*` - Anthropic-compatible API\n- `/genai/*` - Google GenAI (Gemini) compatible API\n- `/bedrock/*` - AWS Bedrock compatible API\n- `/cohere/*` - Cohere compatible API\n\n### Framework Integration APIs\nMulti-provider proxy endpoints for AI frameworks:\n- `/litellm/*` - LiteLLM proxy with all provider formats\n- `/langchain/*` - LangChain compatible endpoints\n- `/pydanticai/*` - PydanticAI compatible endpoints\n\n### Management APIs (`/api/*`)\nAPIs for managing and monitoring the Bifrost gateway:\n- `/api/config` - Configuration management\n- `/api/providers` - Provider and API key management\n- `/api/plugins` - Plugin management\n- `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, and routing rules\n- `/api/logs` - Log search and analytics\n- `/api/mcp/*` - MCP (Model Context Protocol) client management\n- `/api/session/*` - Authentication and session management\n- `/api/cache/*` - Cache management\n- `/health` - Health check endpoint\n\n## Fallbacks\nRequests can include fallback models that will be tried if the primary model fails.\n",
+ "description": "Bifrost HTTP Transport API for AI model inference and gateway management.\n\nThis API provides a unified interface for interacting with multiple AI providers\nincluding OpenAI, Anthropic, Bedrock, Gemini, and more through a single API,\nalong with comprehensive management APIs for configuring and monitoring the gateway.\n\n## API Structure\n\n### Unified Inference API (`/v1/*`)\nThe primary API using Bifrost's unified format. Model parameters use the format\n`provider/model` (e.g., `openai/gpt-4`, `anthropic/claude-3-opus`).\n\n### Async Inference API (`/v1/async/*`)\nSubmit inference requests for asynchronous execution. Returns a job ID immediately\nand allows polling for results. Supports all inference types except batches, files,\nand containers.\n\n### Provider Integration APIs\nNative provider-format APIs for drop-in compatibility:\n- `/openai/*` - OpenAI-compatible API\n- `/anthropic/*` - Anthropic-compatible API\n- `/genai/*` - Google GenAI (Gemini) compatible API\n- `/bedrock/*` - AWS Bedrock compatible API\n- `/cohere/*` - Cohere compatible API\n\n### Framework Integration APIs\nMulti-provider proxy endpoints for AI frameworks:\n- `/litellm/*` - LiteLLM proxy with all provider formats\n- `/langchain/*` - LangChain compatible endpoints\n- `/pydanticai/*` - PydanticAI compatible endpoints\n\n### Management APIs (`/api/*`)\nAPIs for managing and monitoring the Bifrost gateway:\n- `/api/config` - Configuration management\n- `/api/providers` - Provider and API key management\n- `/api/plugins` - Plugin management\n- `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, routing rules, and pricing overrides\n- `/api/logs` - Log search and analytics\n- `/api/mcp/*` - MCP (Model Context Protocol) client management\n- `/api/session/*` - Authentication and session management\n- `/api/cache/*` - Cache management\n- `/health` - Health check endpoint\n\n## Fallbacks\nRequests can include fallback models that will be tried if the primary model fails.\n",
"version": "1.0.0",
"contact": {
"name": "Contact Us",
@@ -145104,7 +145104,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -145649,6 +145651,7 @@
},
"provider_configs": {
"type": "array",
+ "description": "Provider configurations (empty means no providers allowed, deny-by-default)",
"items": {
"type": "object",
"properties": {
@@ -145656,7 +145659,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -145711,6 +145716,7 @@
},
"mcp_configs": {
"type": "array",
+ "description": "MCP configurations (empty means no MCP tools allowed, deny-by-default)",
"items": {
"type": "object",
"properties": {
@@ -145823,7 +145829,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -146494,7 +146502,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -147137,7 +147147,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -147299,7 +147311,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -148334,7 +148348,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -148845,7 +148861,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -149556,7 +149574,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -150067,7 +150087,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -150838,7 +150860,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -151349,7 +151373,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -152144,7 +152170,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -152655,7 +152683,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -153693,7 +153723,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -154372,7 +154404,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -155181,7 +155215,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -155692,7 +155728,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -156181,7 +156219,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -156944,7 +156984,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -162068,8 +162110,2080 @@
}
}
},
- "404": {
- "description": "Provider not found",
+ "404": {
+ "description": "Provider not found",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Error response from Bifrost",
+ "properties": {
+ "event_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "is_bifrost_error": {
+ "type": "boolean"
+ },
+ "status_code": {
+ "type": "integer"
+ },
+ "error": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ },
+ "param": {
+ "type": "string"
+ },
+ "event_id": {
+ "type": "string"
+ }
+ }
+ },
+ "extra_fields": {
+ "type": "object",
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "AI model provider identifier",
+ "enum": [
+ "openai",
+ "azure",
+ "anthropic",
+ "bedrock",
+ "cohere",
+ "vertex",
+ "vllm",
+ "mistral",
+ "ollama",
+ "groq",
+ "sgl",
+ "parasail",
+ "perplexity",
+ "replicate",
+ "cerebras",
+ "gemini",
+ "openrouter",
+ "elevenlabs",
+ "huggingface",
+ "nebius",
+ "xai",
+ "runway"
+ ]
+ },
+ "model_requested": {
+ "type": "string"
+ },
+ "request_type": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "500": {
+ "description": "Internal server error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Error response from Bifrost",
+ "properties": {
+ "event_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "is_bifrost_error": {
+ "type": "boolean"
+ },
+ "status_code": {
+ "type": "integer"
+ },
+ "error": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ },
+ "param": {
+ "type": "string"
+ },
+ "event_id": {
+ "type": "string"
+ }
+ }
+ },
+ "extra_fields": {
+ "type": "object",
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "AI model provider identifier",
+ "enum": [
+ "openai",
+ "azure",
+ "anthropic",
+ "bedrock",
+ "cohere",
+ "vertex",
+ "vllm",
+ "mistral",
+ "ollama",
+ "groq",
+ "sgl",
+ "parasail",
+ "perplexity",
+ "replicate",
+ "cerebras",
+ "gemini",
+ "openrouter",
+ "elevenlabs",
+ "huggingface",
+ "nebius",
+ "xai",
+ "runway"
+ ]
+ },
+ "model_requested": {
+ "type": "string"
+ },
+ "request_type": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "delete": {
+ "operationId": "deleteProviderGovernance",
+ "summary": "Delete provider governance",
+ "description": "Removes governance settings (budget and rate limits) for a specific provider.",
+ "tags": [
+ "Governance"
+ ],
+ "parameters": [
+ {
+ "name": "provider_name",
+ "in": "path",
+ "required": true,
+ "description": "Provider name",
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Provider governance deleted successfully",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Simple message response",
+ "properties": {
+ "message": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ },
+ "404": {
+ "description": "Provider not found",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Error response from Bifrost",
+ "properties": {
+ "event_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "is_bifrost_error": {
+ "type": "boolean"
+ },
+ "status_code": {
+ "type": "integer"
+ },
+ "error": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ },
+ "param": {
+ "type": "string"
+ },
+ "event_id": {
+ "type": "string"
+ }
+ }
+ },
+ "extra_fields": {
+ "type": "object",
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "AI model provider identifier",
+ "enum": [
+ "openai",
+ "azure",
+ "anthropic",
+ "bedrock",
+ "cohere",
+ "vertex",
+ "vllm",
+ "mistral",
+ "ollama",
+ "groq",
+ "sgl",
+ "parasail",
+ "perplexity",
+ "replicate",
+ "cerebras",
+ "gemini",
+ "openrouter",
+ "elevenlabs",
+ "huggingface",
+ "nebius",
+ "xai",
+ "runway"
+ ]
+ },
+ "model_requested": {
+ "type": "string"
+ },
+ "request_type": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "500": {
+ "description": "Internal server error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Error response from Bifrost",
+ "properties": {
+ "event_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "is_bifrost_error": {
+ "type": "boolean"
+ },
+ "status_code": {
+ "type": "integer"
+ },
+ "error": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ },
+ "param": {
+ "type": "string"
+ },
+ "event_id": {
+ "type": "string"
+ }
+ }
+ },
+ "extra_fields": {
+ "type": "object",
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "AI model provider identifier",
+ "enum": [
+ "openai",
+ "azure",
+ "anthropic",
+ "bedrock",
+ "cohere",
+ "vertex",
+ "vllm",
+ "mistral",
+ "ollama",
+ "groq",
+ "sgl",
+ "parasail",
+ "perplexity",
+ "replicate",
+ "cerebras",
+ "gemini",
+ "openrouter",
+ "elevenlabs",
+ "huggingface",
+ "nebius",
+ "xai",
+ "runway"
+ ]
+ },
+ "model_requested": {
+ "type": "string"
+ },
+ "request_type": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/governance/pricing-overrides": {
+ "get": {
+ "operationId": "listPricingOverrides",
+ "summary": "List pricing overrides",
+ "description": "Returns all pricing overrides, optionally filtered by scope.",
+ "tags": [
+ "Governance"
+ ],
+ "parameters": [
+ {
+ "name": "scope_kind",
+ "in": "query",
+ "description": "Filter by scope kind",
+ "schema": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ]
+ }
+ },
+ {
+ "name": "virtual_key_id",
+ "in": "query",
+ "description": "Filter by virtual key ID (for virtual_key* scopes)",
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "provider_id",
+ "in": "query",
+ "description": "Filter by provider ID",
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "provider_key_id",
+ "in": "query",
+ "description": "Filter by provider key ID",
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "pricing_overrides": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "description": "A pricing override that applies custom rates to matching requests.",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique override ID (UUID)"
+ },
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ],
+ "description": "Scope that determines which requests this override applies to"
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ],
+ "description": "How the pattern is matched against the model name"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "pricing_patch": {
+ "type": "string",
+ "description": "JSON-encoded pricing fields to override (as stored in the database)"
+ },
+ "patch": {
+ "type": "object",
+ "description": "Decoded pricing fields (present in API responses)",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ },
+ "config_hash": {
+ "type": "string",
+ "nullable": true,
+ "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "updated_at": {
+ "type": "string",
+ "format": "date-time"
+ }
+ }
+ }
+ },
+ "count": {
+ "type": "integer",
+ "description": "Total number of overrides returned"
+ }
+ }
+ }
+ }
+ }
+ },
+ "500": {
+ "description": "Internal server error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Error response from Bifrost",
+ "properties": {
+ "event_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "is_bifrost_error": {
+ "type": "boolean"
+ },
+ "status_code": {
+ "type": "integer"
+ },
+ "error": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ },
+ "param": {
+ "type": "string"
+ },
+ "event_id": {
+ "type": "string"
+ }
+ }
+ },
+ "extra_fields": {
+ "type": "object",
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "AI model provider identifier",
+ "enum": [
+ "openai",
+ "azure",
+ "anthropic",
+ "bedrock",
+ "cohere",
+ "vertex",
+ "vllm",
+ "mistral",
+ "ollama",
+ "groq",
+ "sgl",
+ "parasail",
+ "perplexity",
+ "replicate",
+ "cerebras",
+ "gemini",
+ "openrouter",
+ "elevenlabs",
+ "huggingface",
+ "nebius",
+ "xai",
+ "runway"
+ ]
+ },
+ "model_requested": {
+ "type": "string"
+ },
+ "request_type": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "post": {
+ "operationId": "createPricingOverride",
+ "summary": "Create pricing override",
+ "description": "Creates a new pricing override. The most specific matching scope always wins during cost resolution.",
+ "tags": [
+ "Governance"
+ ],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Request body for creating or updating a pricing override.",
+ "required": [
+ "name",
+ "scope_kind",
+ "match_type",
+ "pattern",
+ "request_types"
+ ],
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ]
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ]
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix ending with * (e.g. \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "patch": {
+ "type": "object",
+ "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "responses": {
+ "201": {
+ "description": "Pricing override created successfully",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "message": {
+ "type": "string"
+ },
+ "pricing_override": {
+ "type": "object",
+ "description": "A pricing override that applies custom rates to matching requests.",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique override ID (UUID)"
+ },
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ],
+ "description": "Scope that determines which requests this override applies to"
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ],
+ "description": "How the pattern is matched against the model name"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "pricing_patch": {
+ "type": "string",
+ "description": "JSON-encoded pricing fields to override (as stored in the database)"
+ },
+ "patch": {
+ "type": "object",
+ "description": "Decoded pricing fields (present in API responses)",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ },
+ "config_hash": {
+ "type": "string",
+ "nullable": true,
+ "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "updated_at": {
+ "type": "string",
+ "format": "date-time"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "400": {
+ "description": "Bad request",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Error response from Bifrost",
+ "properties": {
+ "event_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "is_bifrost_error": {
+ "type": "boolean"
+ },
+ "status_code": {
+ "type": "integer"
+ },
+ "error": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ },
+ "param": {
+ "type": "string"
+ },
+ "event_id": {
+ "type": "string"
+ }
+ }
+ },
+ "extra_fields": {
+ "type": "object",
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "AI model provider identifier",
+ "enum": [
+ "openai",
+ "azure",
+ "anthropic",
+ "bedrock",
+ "cohere",
+ "vertex",
+ "vllm",
+ "mistral",
+ "ollama",
+ "groq",
+ "sgl",
+ "parasail",
+ "perplexity",
+ "replicate",
+ "cerebras",
+ "gemini",
+ "openrouter",
+ "elevenlabs",
+ "huggingface",
+ "nebius",
+ "xai",
+ "runway"
+ ]
+ },
+ "model_requested": {
+ "type": "string"
+ },
+ "request_type": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "500": {
+ "description": "Internal server error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Error response from Bifrost",
+ "properties": {
+ "event_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string"
+ },
+ "is_bifrost_error": {
+ "type": "boolean"
+ },
+ "status_code": {
+ "type": "integer"
+ },
+ "error": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ },
+ "param": {
+ "type": "string"
+ },
+ "event_id": {
+ "type": "string"
+ }
+ }
+ },
+ "extra_fields": {
+ "type": "object",
+ "properties": {
+ "provider": {
+ "type": "string",
+ "description": "AI model provider identifier",
+ "enum": [
+ "openai",
+ "azure",
+ "anthropic",
+ "bedrock",
+ "cohere",
+ "vertex",
+ "vllm",
+ "mistral",
+ "ollama",
+ "groq",
+ "sgl",
+ "parasail",
+ "perplexity",
+ "replicate",
+ "cerebras",
+ "gemini",
+ "openrouter",
+ "elevenlabs",
+ "huggingface",
+ "nebius",
+ "xai",
+ "runway"
+ ]
+ },
+ "model_requested": {
+ "type": "string"
+ },
+ "request_type": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "/api/governance/pricing-overrides/{id}": {
+ "put": {
+ "operationId": "updatePricingOverride",
+ "summary": "Update pricing override",
+ "description": "Replaces an existing pricing override's configuration.",
+ "tags": [
+ "Governance"
+ ],
+ "parameters": [
+ {
+ "name": "id",
+ "in": "path",
+ "required": true,
+ "description": "Pricing override ID",
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Request body for creating or updating a pricing override.",
+ "required": [
+ "name",
+ "scope_kind",
+ "match_type",
+ "pattern",
+ "request_types"
+ ],
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ]
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ]
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix ending with * (e.g. \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "patch": {
+ "type": "object",
+ "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "responses": {
+ "200": {
+ "description": "Pricing override updated successfully",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "message": {
+ "type": "string"
+ },
+ "pricing_override": {
+ "type": "object",
+ "description": "A pricing override that applies custom rates to matching requests.",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique override ID (UUID)"
+ },
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ],
+ "description": "Scope that determines which requests this override applies to"
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ],
+ "description": "How the pattern is matched against the model name"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "pricing_patch": {
+ "type": "string",
+ "description": "JSON-encoded pricing fields to override (as stored in the database)"
+ },
+ "patch": {
+ "type": "object",
+ "description": "Decoded pricing fields (present in API responses)",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ },
+ "config_hash": {
+ "type": "string",
+ "nullable": true,
+ "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "updated_at": {
+ "type": "string",
+ "format": "date-time"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "400": {
+ "description": "Bad request",
"content": {
"application/json": {
"schema": {
@@ -162152,8 +164266,8 @@
}
}
},
- "500": {
- "description": "Internal server error",
+ "404": {
+ "description": "Pricing override not found",
"content": {
"application/json": {
"schema": {
@@ -162235,46 +164349,9 @@
}
}
}
- }
- }
- },
- "delete": {
- "operationId": "deleteProviderGovernance",
- "summary": "Delete provider governance",
- "description": "Removes governance settings (budget and rate limits) for a specific provider.",
- "tags": [
- "Governance"
- ],
- "parameters": [
- {
- "name": "provider_name",
- "in": "path",
- "required": true,
- "description": "Provider name",
- "schema": {
- "type": "string"
- }
- }
- ],
- "responses": {
- "200": {
- "description": "Provider governance deleted successfully",
- "content": {
- "application/json": {
- "schema": {
- "type": "object",
- "description": "Simple message response",
- "properties": {
- "message": {
- "type": "string"
- }
- }
- }
- }
- }
},
- "404": {
- "description": "Provider not found",
+ "500": {
+ "description": "Internal server error",
"content": {
"application/json": {
"schema": {
@@ -162356,6 +164433,43 @@
}
}
}
+ }
+ }
+ },
+ "delete": {
+ "operationId": "deletePricingOverride",
+ "summary": "Delete pricing override",
+ "description": "Deletes a pricing override by ID.",
+ "tags": [
+ "Governance"
+ ],
+ "parameters": [
+ {
+ "name": "id",
+ "in": "path",
+ "required": true,
+ "description": "Pricing override ID",
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Pricing override deleted successfully",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "description": "Simple message response",
+ "properties": {
+ "message": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
},
"500": {
"description": "Internal server error",
@@ -170315,7 +172429,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -198971,7 +201087,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -199437,7 +201555,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -199877,7 +201997,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -200336,7 +202458,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -200781,6 +202905,7 @@
},
"provider_configs": {
"type": "array",
+ "description": "Provider configurations (empty means no providers allowed, deny-by-default)",
"items": {
"type": "object",
"properties": {
@@ -200788,7 +202913,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -200843,6 +202970,7 @@
},
"mcp_configs": {
"type": "array",
+ "description": "MCP configurations (empty means no MCP tools allowed, deny-by-default)",
"items": {
"type": "object",
"properties": {
@@ -200927,7 +203055,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -201147,7 +203277,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -201658,7 +203790,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -202224,7 +204358,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -202735,7 +204871,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -203302,7 +205440,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -203813,7 +205953,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -204484,7 +206626,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -204995,7 +207139,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -205484,7 +207630,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -206021,7 +208169,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -206559,7 +208709,9 @@
"type": "string"
},
"weight": {
- "type": "number"
+ "type": "number",
+ "nullable": true,
+ "description": "Weight for provider load balancing. Null means excluded from weighted routing."
},
"allowed_models": {
"type": "array",
@@ -209096,6 +211248,1267 @@
}
}
},
+ "PricingOverrideRequestType": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "PricingPatch": {
+ "type": "object",
+ "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ },
+ "PricingOverride": {
+ "type": "object",
+ "description": "A pricing override that applies custom rates to matching requests.",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique override ID (UUID)"
+ },
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ],
+ "description": "Scope that determines which requests this override applies to"
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ],
+ "description": "How the pattern is matched against the model name"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "pricing_patch": {
+ "type": "string",
+ "description": "JSON-encoded pricing fields to override (as stored in the database)"
+ },
+ "patch": {
+ "type": "object",
+ "description": "Decoded pricing fields (present in API responses)",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ },
+ "config_hash": {
+ "type": "string",
+ "nullable": true,
+ "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "updated_at": {
+ "type": "string",
+ "format": "date-time"
+ }
+ }
+ },
+ "CreatePricingOverrideRequest": {
+ "type": "object",
+ "description": "Request body for creating or updating a pricing override.",
+ "required": [
+ "name",
+ "scope_kind",
+ "match_type",
+ "pattern",
+ "request_types"
+ ],
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ]
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ]
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix ending with * (e.g. \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "patch": {
+ "type": "object",
+ "description": "Pricing fields to override. Only non-zero/non-null fields are applied. All values are cost per unit in USD.\n",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ }
+ }
+ },
+ "PricingOverrideResponse": {
+ "type": "object",
+ "properties": {
+ "message": {
+ "type": "string"
+ },
+ "pricing_override": {
+ "type": "object",
+ "description": "A pricing override that applies custom rates to matching requests.",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique override ID (UUID)"
+ },
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ],
+ "description": "Scope that determines which requests this override applies to"
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ],
+ "description": "How the pattern is matched against the model name"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "pricing_patch": {
+ "type": "string",
+ "description": "JSON-encoded pricing fields to override (as stored in the database)"
+ },
+ "patch": {
+ "type": "object",
+ "description": "Decoded pricing fields (present in API responses)",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ },
+ "config_hash": {
+ "type": "string",
+ "nullable": true,
+ "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "updated_at": {
+ "type": "string",
+ "format": "date-time"
+ }
+ }
+ }
+ }
+ },
+ "ListPricingOverridesResponse": {
+ "type": "object",
+ "properties": {
+ "pricing_overrides": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "description": "A pricing override that applies custom rates to matching requests.",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique override ID (UUID)"
+ },
+ "name": {
+ "type": "string",
+ "description": "Human-readable label"
+ },
+ "scope_kind": {
+ "type": "string",
+ "enum": [
+ "global",
+ "provider",
+ "provider_key",
+ "virtual_key",
+ "virtual_key_provider",
+ "virtual_key_provider_key"
+ ],
+ "description": "Scope that determines which requests this override applies to"
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for virtual_key* scopes"
+ },
+ "provider_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider and virtual_key_provider scopes"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "nullable": true,
+ "description": "Required for provider_key and virtual_key_provider_key scopes"
+ },
+ "match_type": {
+ "type": "string",
+ "enum": [
+ "exact",
+ "wildcard"
+ ],
+ "description": "How the pattern is matched against the model name"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name or wildcard prefix (e.g. \"gpt-4o\" or \"claude-3*\")"
+ },
+ "request_types": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "type": "string",
+ "description": "Request type for pricing override filtering. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.\n",
+ "enum": [
+ "chat_completion",
+ "text_completion",
+ "responses",
+ "embedding",
+ "rerank",
+ "speech",
+ "transcription",
+ "image_generation",
+ "image_variation",
+ "image_edit",
+ "video_generation",
+ "video_remix"
+ ]
+ },
+ "description": "Request types this override applies to. At least one value is required."
+ },
+ "pricing_patch": {
+ "type": "string",
+ "description": "JSON-encoded pricing fields to override (as stored in the database)"
+ },
+ "patch": {
+ "type": "object",
+ "description": "Decoded pricing fields (present in API responses)",
+ "properties": {
+ "input_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_batches": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_character": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_128k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_token_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_above_200k_tokens": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_token_cost_priority": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_read_input_image_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "cache_creation_input_audio_token_cost": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_pixel": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_low_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_medium_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_high_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_auto_quality": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_premium_image": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_512_and_512_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_1024_and_1024_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_2048_and_2048_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_image_above_4096_and_4096_pixels": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_audio_token": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_audio_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "input_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_video_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "output_cost_per_second": {
+ "type": "number",
+ "minimum": 0
+ },
+ "search_context_cost_per_query": {
+ "type": "number",
+ "minimum": 0
+ },
+ "code_interpreter_cost_per_session": {
+ "type": "number",
+ "minimum": 0
+ }
+ }
+ },
+ "config_hash": {
+ "type": "string",
+ "nullable": true,
+ "description": "Auto-managed hash for config-file-sourced overrides. Do not set manually."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "updated_at": {
+ "type": "string",
+ "format": "date-time"
+ }
+ }
+ }
+ },
+ "count": {
+ "type": "integer",
+ "description": "Total number of overrides returned"
+ }
+ }
+ },
"LogEntry": {
"type": "object",
"description": "Log entry",
diff --git a/docs/openapi/openapi.yaml b/docs/openapi/openapi.yaml
index 9059709cb5..027fa9a643 100644
--- a/docs/openapi/openapi.yaml
+++ b/docs/openapi/openapi.yaml
@@ -38,7 +38,7 @@ info:
- `/api/config` - Configuration management
- `/api/providers` - Provider and API key management
- `/api/plugins` - Plugin management
- - `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, and routing rules
+ - `/api/governance/*` - Virtual keys, teams, customers, budgets, rate limits, routing rules, and pricing overrides
- `/api/logs` - Log search and analytics
- `/api/mcp/*` - MCP (Model Context Protocol) client management
- `/api/session/*` - Authentication and session management
@@ -636,6 +636,12 @@ paths:
/api/governance/providers/{provider_name}:
$ref: './paths/management/governance.yaml#/provider-governance-by-name'
+ # Governance - Pricing Overrides
+ /api/governance/pricing-overrides:
+ $ref: './paths/management/governance.yaml#/pricing-overrides'
+ /api/governance/pricing-overrides/{id}:
+ $ref: './paths/management/governance.yaml#/pricing-overrides-by-id'
+
# Logging
/api/logs:
$ref: './paths/management/logging.yaml#/logs'
@@ -1097,6 +1103,22 @@ components:
UpdateProviderGovernanceRequest:
$ref: './schemas/management/governance.yaml#/UpdateProviderGovernanceRequest'
+ # Governance - Pricing Overrides
+ PricingOverrideRequestType:
+ $ref: './schemas/management/governance.yaml#/PricingOverrideRequestType'
+ PricingPatch:
+ $ref: './schemas/management/governance.yaml#/PricingPatch'
+ PricingOverride:
+ $ref: './schemas/management/governance.yaml#/PricingOverride'
+ CreatePricingOverrideRequest:
+ $ref: './schemas/management/governance.yaml#/CreatePricingOverrideRequest'
+ UpdatePricingOverrideRequest:
+ $ref: './schemas/management/governance.yaml#/UpdatePricingOverrideRequest'
+ PricingOverrideResponse:
+ $ref: './schemas/management/governance.yaml#/PricingOverrideResponse'
+ ListPricingOverridesResponse:
+ $ref: './schemas/management/governance.yaml#/ListPricingOverridesResponse'
+
# Logging
LogEntry:
$ref: './schemas/management/logging.yaml#/LogEntry'
diff --git a/docs/openapi/paths/management/governance.yaml b/docs/openapi/paths/management/governance.yaml
index b9e85bfdd6..35e38e1b99 100644
--- a/docs/openapi/paths/management/governance.yaml
+++ b/docs/openapi/paths/management/governance.yaml
@@ -897,4 +897,135 @@ provider-governance-by-name:
schema:
$ref: '../../schemas/inference/common.yaml#/BifrostError'
'500':
- $ref: '../../openapi.yaml#/components/responses/InternalError'
\ No newline at end of file
+ $ref: '../../openapi.yaml#/components/responses/InternalError'
+# Pricing Overrides CRUD
+
+pricing-overrides:
+ get:
+ operationId: listPricingOverrides
+ summary: List pricing overrides
+ description: Returns all pricing overrides, optionally filtered by scope.
+ tags:
+ - Governance
+ parameters:
+ - name: scope_kind
+ in: query
+ description: Filter by scope kind
+ schema:
+ type: string
+ enum:
+ - global
+ - provider
+ - provider_key
+ - virtual_key
+ - virtual_key_provider
+ - virtual_key_provider_key
+ - name: virtual_key_id
+ in: query
+ description: Filter by virtual key ID (for virtual_key* scopes)
+ schema:
+ type: string
+ - name: provider_id
+ in: query
+ description: Filter by provider ID
+ schema:
+ type: string
+ - name: provider_key_id
+ in: query
+ description: Filter by provider key ID
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Successful response
+ content:
+ application/json:
+ schema:
+ $ref: '../../schemas/management/governance.yaml#/ListPricingOverridesResponse'
+ '500':
+ $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+ post:
+ operationId: createPricingOverride
+ summary: Create pricing override
+ description: Creates a new pricing override. The most specific matching scope always wins during cost resolution.
+ tags:
+ - Governance
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '../../schemas/management/governance.yaml#/CreatePricingOverrideRequest'
+ responses:
+ '201':
+ description: Pricing override created successfully
+ content:
+ application/json:
+ schema:
+ $ref: '../../schemas/management/governance.yaml#/PricingOverrideResponse'
+ '400':
+ $ref: '../../openapi.yaml#/components/responses/BadRequest'
+ '500':
+ $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+pricing-overrides-by-id:
+ put:
+ operationId: updatePricingOverride
+ summary: Update pricing override
+ description: Updates an existing pricing override. Omitted fields are merged from the existing record. The `patch` field is always replaced in full when provided.
+ tags:
+ - Governance
+ parameters:
+ - name: id
+ in: path
+ required: true
+ description: Pricing override ID
+ schema:
+ type: string
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '../../schemas/management/governance.yaml#/UpdatePricingOverrideRequest'
+ responses:
+ '200':
+ description: Pricing override updated successfully
+ content:
+ application/json:
+ schema:
+ $ref: '../../schemas/management/governance.yaml#/PricingOverrideResponse'
+ '400':
+ $ref: '../../openapi.yaml#/components/responses/BadRequest'
+ '404':
+ description: Pricing override not found
+ content:
+ application/json:
+ schema:
+ $ref: '../../schemas/inference/common.yaml#/BifrostError'
+ '500':
+ $ref: '../../openapi.yaml#/components/responses/InternalError'
+
+ delete:
+ operationId: deletePricingOverride
+ summary: Delete pricing override
+ description: Deletes a pricing override by ID.
+ tags:
+ - Governance
+ parameters:
+ - name: id
+ in: path
+ required: true
+ description: Pricing override ID
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Pricing override deleted successfully
+ content:
+ application/json:
+ schema:
+ $ref: '../../schemas/management/common.yaml#/MessageResponse'
+ '500':
+ $ref: '../../openapi.yaml#/components/responses/InternalError'
diff --git a/docs/openapi/schemas/management/governance.yaml b/docs/openapi/schemas/management/governance.yaml
index 7053f1a8dc..a5d8b379f6 100644
--- a/docs/openapi/schemas/management/governance.yaml
+++ b/docs/openapi/schemas/management/governance.yaml
@@ -937,3 +937,334 @@ UpdateProviderGovernanceRequest:
rate_limit:
$ref: '#/UpdateRateLimitRequest'
description: Rate limit configuration
+
+# Pricing Overrides
+
+PricingOverrideRequestType:
+ type: string
+ description: >
+ Request type for pricing override filtering. Stream variants are treated
+ identically to their base type — specifying `chat_completion` covers both
+ streaming and non-streaming chat requests.
+ enum:
+ - chat_completion
+ - text_completion
+ - responses
+ - embedding
+ - rerank
+ - speech
+ - transcription
+ - image_generation
+ - image_variation
+ - image_edit
+ - video_generation
+ - video_remix
+
+PricingPatch:
+ type: object
+ description: >
+ Pricing fields to override. Only non-zero/non-null fields are applied.
+ All values are cost per unit in USD.
+ properties:
+ input_cost_per_token:
+ type: number
+ minimum: 0
+ output_cost_per_token:
+ type: number
+ minimum: 0
+ input_cost_per_token_batches:
+ type: number
+ minimum: 0
+ output_cost_per_token_batches:
+ type: number
+ minimum: 0
+ input_cost_per_token_priority:
+ type: number
+ minimum: 0
+ output_cost_per_token_priority:
+ type: number
+ minimum: 0
+ input_cost_per_character:
+ type: number
+ minimum: 0
+ input_cost_per_token_above_128k_tokens:
+ type: number
+ minimum: 0
+ output_cost_per_token_above_128k_tokens:
+ type: number
+ minimum: 0
+ input_cost_per_token_above_200k_tokens:
+ type: number
+ minimum: 0
+ output_cost_per_token_above_200k_tokens:
+ type: number
+ minimum: 0
+ cache_creation_input_token_cost:
+ type: number
+ minimum: 0
+ cache_read_input_token_cost:
+ type: number
+ minimum: 0
+ cache_creation_input_token_cost_above_200k_tokens:
+ type: number
+ minimum: 0
+ cache_read_input_token_cost_above_200k_tokens:
+ type: number
+ minimum: 0
+ cache_read_input_token_cost_priority:
+ type: number
+ minimum: 0
+ cache_read_input_image_token_cost:
+ type: number
+ minimum: 0
+ cache_creation_input_audio_token_cost:
+ type: number
+ minimum: 0
+ input_cost_per_image:
+ type: number
+ minimum: 0
+ output_cost_per_image:
+ type: number
+ minimum: 0
+ input_cost_per_pixel:
+ type: number
+ minimum: 0
+ output_cost_per_pixel:
+ type: number
+ minimum: 0
+ input_cost_per_image_token:
+ type: number
+ minimum: 0
+ output_cost_per_image_token:
+ type: number
+ minimum: 0
+ output_cost_per_image_low_quality:
+ type: number
+ minimum: 0
+ output_cost_per_image_medium_quality:
+ type: number
+ minimum: 0
+ output_cost_per_image_high_quality:
+ type: number
+ minimum: 0
+ output_cost_per_image_auto_quality:
+ type: number
+ minimum: 0
+ output_cost_per_image_premium_image:
+ type: number
+ minimum: 0
+ output_cost_per_image_above_512_and_512_pixels:
+ type: number
+ minimum: 0
+ output_cost_per_image_above_1024_and_1024_pixels:
+ type: number
+ minimum: 0
+ output_cost_per_image_above_2048_and_2048_pixels:
+ type: number
+ minimum: 0
+ output_cost_per_image_above_4096_and_4096_pixels:
+ type: number
+ minimum: 0
+ input_cost_per_audio_token:
+ type: number
+ minimum: 0
+ output_cost_per_audio_token:
+ type: number
+ minimum: 0
+ input_cost_per_audio_per_second:
+ type: number
+ minimum: 0
+ input_cost_per_second:
+ type: number
+ minimum: 0
+ input_cost_per_video_per_second:
+ type: number
+ minimum: 0
+ output_cost_per_video_per_second:
+ type: number
+ minimum: 0
+ output_cost_per_second:
+ type: number
+ minimum: 0
+ search_context_cost_per_query:
+ type: number
+ minimum: 0
+ code_interpreter_cost_per_session:
+ type: number
+ minimum: 0
+
+PricingOverride:
+ type: object
+ description: A pricing override that applies custom rates to matching requests.
+ properties:
+ id:
+ type: string
+ description: Unique override ID (UUID)
+ name:
+ type: string
+ description: Human-readable label
+ scope_kind:
+ type: string
+ enum:
+ - global
+ - provider
+ - provider_key
+ - virtual_key
+ - virtual_key_provider
+ - virtual_key_provider_key
+ description: Scope that determines which requests this override applies to
+ virtual_key_id:
+ type: string
+ nullable: true
+ description: Required for virtual_key* scopes
+ provider_id:
+ type: string
+ nullable: true
+ description: Required for provider and virtual_key_provider scopes
+ provider_key_id:
+ type: string
+ nullable: true
+ description: Required for provider_key and virtual_key_provider_key scopes
+ match_type:
+ type: string
+ enum:
+ - exact
+ - wildcard
+ description: How the pattern is matched against the model name
+ pattern:
+ type: string
+ description: Model name or wildcard prefix (e.g. "gpt-4o" or "claude-3*")
+ request_types:
+ type: array
+ minItems: 1
+ items:
+ $ref: '#/PricingOverrideRequestType'
+ description: Request types this override applies to. At least one value is required.
+ pricing_patch:
+ type: string
+ description: JSON-encoded pricing fields to override (as stored in the database)
+ patch:
+ $ref: '#/PricingPatch'
+ description: Decoded pricing fields (present in API responses)
+ config_hash:
+ type: string
+ nullable: true
+ description: Auto-managed hash for config-file-sourced overrides. Do not set manually.
+ created_at:
+ type: string
+ format: date-time
+ updated_at:
+ type: string
+ format: date-time
+
+CreatePricingOverrideRequest:
+ type: object
+ description: Request body for creating a pricing override.
+ required:
+ - name
+ - scope_kind
+ - match_type
+ - pattern
+ - request_types
+ properties:
+ name:
+ type: string
+ description: Human-readable label
+ scope_kind:
+ type: string
+ enum:
+ - global
+ - provider
+ - provider_key
+ - virtual_key
+ - virtual_key_provider
+ - virtual_key_provider_key
+ virtual_key_id:
+ type: string
+ description: Required for virtual_key* scopes
+ provider_id:
+ type: string
+ description: Required for provider and virtual_key_provider scopes
+ provider_key_id:
+ type: string
+ description: Required for provider_key and virtual_key_provider_key scopes
+ match_type:
+ type: string
+ enum:
+ - exact
+ - wildcard
+ pattern:
+ type: string
+ description: Model name or wildcard prefix ending with * (e.g. "claude-3*")
+ request_types:
+ type: array
+ minItems: 1
+ items:
+ $ref: '#/PricingOverrideRequestType'
+ description: Request types this override applies to. At least one value is required.
+ patch:
+ $ref: '#/PricingPatch'
+
+UpdatePricingOverrideRequest:
+ type: object
+ description: >
+ Request body for updating a pricing override. All fields are optional —
+ omitted fields are merged from the existing record. The `patch` field is
+ always replaced in full when provided.
+ properties:
+ name:
+ type: string
+ description: Human-readable label
+ scope_kind:
+ type: string
+ enum:
+ - global
+ - provider
+ - provider_key
+ - virtual_key
+ - virtual_key_provider
+ - virtual_key_provider_key
+ virtual_key_id:
+ type: string
+ description: Required for virtual_key* scopes
+ provider_id:
+ type: string
+ description: Required for provider and virtual_key_provider scopes
+ provider_key_id:
+ type: string
+ description: Required for provider_key and virtual_key_provider_key scopes
+ match_type:
+ type: string
+ enum:
+ - exact
+ - wildcard
+ pattern:
+ type: string
+ description: Model name or wildcard prefix ending with * (e.g. "claude-3*")
+ request_types:
+ type: array
+ minItems: 1
+ items:
+ $ref: '#/PricingOverrideRequestType'
+ description: Request types this override applies to.
+ patch:
+ $ref: '#/PricingPatch'
+
+PricingOverrideResponse:
+ type: object
+ properties:
+ message:
+ type: string
+ pricing_override:
+ $ref: '#/PricingOverride'
+
+ListPricingOverridesResponse:
+ type: object
+ properties:
+ pricing_overrides:
+ type: array
+ items:
+ $ref: '#/PricingOverride'
+ count:
+ type: integer
+ description: Total number of overrides returned
diff --git a/docs/providers/custom-pricing.mdx b/docs/providers/custom-pricing.mdx
new file mode 100644
index 0000000000..13c883773f
--- /dev/null
+++ b/docs/providers/custom-pricing.mdx
@@ -0,0 +1,410 @@
+---
+title: "Custom Pricing"
+description: "Set custom rates for any model across global or virtual key scopes, optionally narrowed to a specific provider or key."
+icon: "circle-dollar-to-slot"
+---
+
+## Overview
+
+Bifrost computes request costs using a built-in pricing catalog that is automatically synced from a remote datasheet. **Custom Pricing** lets you override those catalog prices at runtime without redeploying, applying your own rates for any model across any combination of provider, key, and virtual key scopes.
+
+**Key capabilities:**
+- **Scoped overrides** — apply prices globally or narrow them to a specific provider, provider key, or virtual key
+- **Pattern matching** — target an exact model name or a wildcard prefix (e.g. `gpt-4*`)
+- **Request type filtering** — restrict an override to one or more specific operations (chat, embeddings, image generation, etc.); at least one request type is required
+- **Hierarchical resolution** — the most-specific matching override always wins; broader scopes act as fallbacks
+
+---
+
+## Pricing data source
+
+Before configuring overrides, Bifrost needs a pricing catalog to work from. By default it ships with built-in prices and syncs them every 24 hours. You can point it at a custom pricing URL if you maintain your own datasheet.
+
+
+
+
+1. Navigate to **Models** in the sidebar
+2. Click the **Pricing Settings** tab
+3. Enter your pricing datasheet URL in the **Pricing Datasheet URL** field
+4. Set the **Pricing Sync Interval** (in hours)
+5. Click **Save**
+
+
+
+
+```json
+{
+ "framework": {
+ "pricing": {
+ "pricing_url": "https://your-host/pricing.json",
+ "pricing_sync_interval": 86400
+ }
+ }
+}
+```
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `pricing_url` | string (URI) | No | built-in | URL of the pricing datasheet to sync from |
+| `pricing_sync_interval` | integer | No | `86400` | Sync interval in seconds. Minimum `3600` (1 hour) |
+
+
+
+
+---
+
+## Scope hierarchy
+
+Every override is assigned a **scope kind** that determines which requests it applies to. When Bifrost resolves pricing for a request, it evaluates all matching overrides and selects the one with the most specific scope. More specific scopes always win over broader ones.
+
+```
+virtual_key_provider_key (most specific)
+virtual_key_provider
+virtual_key
+provider_key
+provider
+global (least specific / catch-all)
+```
+
+**Scope kinds and their required identifiers:**
+
+| Scope kind | Required | Description |
+|------------|----------|-------------|
+| `global` | — | Applies to every request regardless of provider, key, or virtual key |
+| `provider` | `provider_id` | Applies to all keys under a specific provider |
+| `provider_key` | `provider_key_id` | Applies to a specific provider API key only |
+| `virtual_key` | `virtual_key_id` | Applies to all requests made under a virtual key |
+| `virtual_key_provider` | `virtual_key_id` + `provider_id` | Applies when a virtual key routes to a specific provider |
+| `virtual_key_provider_key` | `virtual_key_id` + `provider_key_id` | Most specific: virtual key + exact provider API key |
+
+
+Scope identifiers are exclusive to their scope kind — you cannot mix them. For example, `virtual_key_provider` requires `virtual_key_id` and `provider_id` and must not include `provider_key_id`.
+
+
+---
+
+## Pattern matching
+
+The `pattern` field controls which model names the override applies to. The `match_type` field controls how the pattern is interpreted.
+
+| Match type | Behavior | Example |
+|------------|----------|---------|
+| `exact` | Matches only the exact model name | `gpt-4o` matches only `gpt-4o` |
+| `wildcard` | Prefix match — pattern must end with `*` | `gpt-4*` matches `gpt-4o`, `gpt-4-turbo`, `gpt-4o-mini` |
+
+
+For wildcard patterns, append a `*` at the end of the prefix. For example, `claude-3*` will match all Claude 3 variants.
+
+
+---
+
+## Request type filtering
+
+`request_types` is **required** and must contain at least one value. Only request types that have pricing support are accepted. Stream variants are treated identically to their base type — specifying `chat_completion` covers both streaming and non-streaming chat requests.
+
+| Type | Description |
+|------|-------------|
+| `chat_completion` | Chat requests (streaming included) |
+| `text_completion` | Legacy text completions (streaming included) |
+| `responses` | Responses API requests (streaming included) |
+| `embedding` | Embedding generation |
+| `rerank` | Reranking |
+| `speech` | Text-to-speech (streaming included) |
+| `transcription` | Speech-to-text (streaming included) |
+| `image_generation` | Image generation (streaming included) |
+| `image_variation` | Image variation |
+| `image_edit` | Image editing (streaming included) |
+| `video_generation` | Video generation |
+| `video_remix` | Video remixing |
+
+---
+
+## Creating an override
+
+
+
+
+1. Navigate to **Models** → **Pricing Overrides** in the sidebar
+
+
+
+2. Click **Create Override**
+3. Fill in the form:
+ - **Name** — a human-readable label
+ - **Scope** — select the scope kind and provide the matching IDs
+ - **Pattern** — enter the model name or wildcard prefix
+ - **Match type** — choose **Exact** or **Wildcard**
+ - **Request types** — select one or more request types (required)
+ - **Pricing fields** — enter the price values you want to override (only non-zero fields are applied)
+4. Click **Save**
+
+
+
+
+
+
+```bash
+curl -X POST http://localhost:8080/api/governance/pricing-overrides \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "GPT-4o reduced input cost",
+ "scope_kind": "global",
+ "match_type": "exact",
+ "pattern": "gpt-4o",
+ "request_types": ["chat_completion"],
+ "patch": {
+ "input_cost_per_token": 0.0000025,
+ "output_cost_per_token": 0.000010
+ }
+ }'
+```
+
+**Response:**
+```json
+{
+ "message": "Pricing override created successfully",
+ "pricing_override": {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "name": "GPT-4o reduced input cost",
+ "scope_kind": "global",
+ "match_type": "exact",
+ "pattern": "gpt-4o",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}",
+ "created_at": "2026-03-20T10:00:00Z",
+ "updated_at": "2026-03-20T10:00:00Z"
+ }
+}
+```
+
+**Update (sparse patch):**
+```bash
+curl -X PATCH http://localhost:8080/api/governance/pricing-overrides/{id} \
+ -H "Content-Type: application/json" \
+ -d '{
+ "patch": {
+ "input_cost_per_token": 0.000002
+ }
+ }'
+```
+
+**Delete:**
+```bash
+curl -X DELETE http://localhost:8080/api/governance/pricing-overrides/{id}
+```
+
+**List (with optional filters):**
+```bash
+# All overrides
+curl http://localhost:8080/api/governance/pricing-overrides
+
+# Filter by scope
+curl "http://localhost:8080/api/governance/pricing-overrides?scope_kind=virtual_key&virtual_key_id=vk-abc123"
+```
+
+
+
+
+Pricing overrides are defined under `governance.pricing_overrides`. Each entry requires `id`, `name`, `scope_kind`, `match_type`, `pattern`, and `request_types`. The `pricing_patch` is a JSON-encoded string containing only the fields you want to override.
+
+```json
+{
+ "governance": {
+ "pricing_overrides": [
+ {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "name": "Global GPT-4o rate",
+ "scope_kind": "global",
+ "match_type": "exact",
+ "pattern": "gpt-4o",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}"
+ },
+ {
+ "id": "660e8400-e29b-41d4-a716-446655440001",
+ "name": "All Claude models for prod VK",
+ "scope_kind": "virtual_key",
+ "virtual_key_id": "vk-abc123",
+ "match_type": "wildcard",
+ "pattern": "claude-3*",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}"
+ }
+ ]
+ }
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `id` | string | Yes | Unique override ID (UUID recommended) |
+| `name` | string | Yes | Human-readable label |
+| `scope_kind` | string | Yes | One of: `global`, `provider`, `provider_key`, `virtual_key`, `virtual_key_provider`, `virtual_key_provider_key` |
+| `virtual_key_id` | string | Conditional | Required for `virtual_key*` scopes |
+| `provider_id` | string | Conditional | Required for `provider` and `virtual_key_provider` scopes |
+| `provider_key_id` | string | Conditional | Required for `provider_key` and `virtual_key_provider_key` scopes |
+| `match_type` | string | Yes | `exact` or `wildcard` |
+| `pattern` | string | Yes | Model name or wildcard prefix ending with `*` |
+| `request_types` | array | Yes | Request types this override applies to. At least one value required. |
+| `pricing_patch` | string | No | JSON-encoded pricing fields to override |
+| `config_hash` | string | No | Auto-managed. Do not set manually |
+
+
+
+
+---
+
+## Pricing fields reference
+
+Only fields with non-zero values are applied. All values are cost **per unit** in USD.
+
+### Token costs
+
+| Field | Description |
+|-------|-------------|
+| `input_cost_per_token` | Standard input token cost |
+| `output_cost_per_token` | Standard output token cost |
+| `input_cost_per_token_batches` | Input token cost for batch requests |
+| `output_cost_per_token_batches` | Output token cost for batch requests |
+| `input_cost_per_token_priority` | Input token cost for priority requests |
+| `output_cost_per_token_priority` | Output token cost for priority requests |
+| `input_cost_per_character` | Input cost per character (character-billed models) |
+
+### Token tier costs
+
+| Field | Description |
+|-------|-------------|
+| `input_cost_per_token_above_128k_tokens` | Input cost above 128k context |
+| `output_cost_per_token_above_128k_tokens` | Output cost above 128k context |
+| `input_cost_per_token_above_200k_tokens` | Input cost above 200k context |
+| `output_cost_per_token_above_200k_tokens` | Output cost above 200k context |
+
+### Cache costs
+
+| Field | Description |
+|-------|-------------|
+| `cache_creation_input_token_cost` | Cost to write a token to the prompt cache |
+| `cache_read_input_token_cost` | Cost to read a cached input token |
+| `cache_creation_input_token_cost_above_200k_tokens` | Cache creation above 200k context |
+| `cache_read_input_token_cost_above_200k_tokens` | Cache read above 200k context |
+| `cache_read_input_token_cost_priority` | Priority cache read cost |
+| `cache_read_input_image_token_cost` | Cache read cost for image tokens |
+| `cache_creation_input_audio_token_cost` | Cache creation cost for audio tokens |
+
+### Image costs
+
+| Field | Description |
+|-------|-------------|
+| `input_cost_per_image` | Cost per input image |
+| `output_cost_per_image` | Cost per generated image |
+| `input_cost_per_pixel` | Cost per input pixel |
+| `output_cost_per_pixel` | Cost per output pixel |
+| `input_cost_per_image_token` | Cost per image input token |
+| `output_cost_per_image_token` | Cost per image output token |
+| `output_cost_per_image_low_quality` | Generated image — low quality |
+| `output_cost_per_image_medium_quality` | Generated image — medium quality |
+| `output_cost_per_image_high_quality` | Generated image — high quality |
+| `output_cost_per_image_auto_quality` | Generated image — auto quality |
+| `output_cost_per_image_above_512_and_512_pixels` | Generated image > 512×512 |
+| `output_cost_per_image_above_1024_and_1024_pixels` | Generated image > 1024×1024 |
+| `output_cost_per_image_above_2048_and_2048_pixels` | Generated image > 2048×2048 |
+| `output_cost_per_image_above_4096_and_4096_pixels` | Generated image > 4096×4096 |
+
+### Audio and video costs
+
+| Field | Description |
+|-------|-------------|
+| `input_cost_per_audio_token` | Cost per audio input token |
+| `input_cost_per_audio_per_second` | Cost per second of audio input |
+| `input_cost_per_second` | Cost per second of input (generic) |
+| `input_cost_per_video_per_second` | Cost per second of video input |
+| `output_cost_per_audio_token` | Cost per audio output token |
+| `output_cost_per_second` | Cost per second of audio output |
+| `output_cost_per_video_per_second` | Cost per second of video output |
+| `input_cost_per_video_per_second_above_128k_tokens` | Video input cost above 128k context |
+| `input_cost_per_audio_per_second_above_128k_tokens` | Audio input cost above 128k context |
+
+### Other costs
+
+| Field | Description |
+|-------|-------------|
+| `search_context_cost_per_query` | Cost per web search context query |
+| `code_interpreter_cost_per_session` | Cost per code interpreter session |
+
+---
+
+## Examples
+
+### Flat rate for all Anthropic models
+
+Apply a single input/output rate to every Claude model globally:
+
+```json
+{
+ "id": "anthropic-flat-rate",
+ "name": "Anthropic flat rate",
+ "scope_kind": "provider",
+ "provider_id": "anthropic",
+ "match_type": "wildcard",
+ "pattern": "claude*",
+ "request_types": ["chat_completion", "text_completion", "responses"],
+ "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}"
+}
+```
+
+### Per-virtual-key negotiated rate
+
+A specific virtual key has negotiated lower prices for GPT-4o:
+
+```json
+{
+ "id": "vk-prod-gpt4o-rate",
+ "name": "Prod VK — GPT-4o negotiated rate",
+ "scope_kind": "virtual_key",
+ "virtual_key_id": "vk-abc123",
+ "match_type": "exact",
+ "pattern": "gpt-4o",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.000002,\"output_cost_per_token\":0.000008}"
+}
+```
+
+### Image generation override
+
+Override costs for a specific image model at global scope:
+
+```json
+{
+ "id": "dall-e-3-rate",
+ "name": "DALL-E 3 custom rate",
+ "scope_kind": "global",
+ "match_type": "exact",
+ "pattern": "dall-e-3",
+ "request_types": ["image_generation"],
+ "pricing_patch": "{\"output_cost_per_image_high_quality\":0.04,\"output_cost_per_image_medium_quality\":0.02}"
+}
+```
+
+### Global catch-all for a new model
+
+Use a global override to add pricing for a model not yet in the built-in catalog:
+
+```json
+{
+ "id": "my-new-model-rate",
+ "name": "my-new-model pricing",
+ "scope_kind": "global",
+ "match_type": "exact",
+ "pattern": "my-new-model-v1",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.000001,\"output_cost_per_token\":0.000005}"
+}
+```
+
+---
+
+## Next steps
+
+- **[Virtual Keys](../features/governance/virtual-keys)** — Attach virtual-key-scoped overrides to virtual keys for per-customer pricing
+- **[Budget and Limits](../features/governance/budget-and-limits)** — Understand how costs are tracked against budgets
+- **[Model Catalog](../architecture/framework/model-catalog)** — Deep dive into how pricing resolution and cost calculation work internally
diff --git a/examples/configs/withpricingoverridesnostore/config.json b/examples/configs/withpricingoverridesnostore/config.json
new file mode 100644
index 0000000000..cfb29ebd35
--- /dev/null
+++ b/examples/configs/withpricingoverridesnostore/config.json
@@ -0,0 +1,74 @@
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "config_store": {
+ "enabled": false
+ },
+ "logs_store": {
+ "enabled": false
+ },
+ "governance": {
+ "pricing_overrides": [
+ {
+ "id": "override-global-gpt4o",
+ "name": "Global GPT-4o Pricing",
+ "scope_kind": "global",
+ "match_type": "exact",
+ "pattern": "gpt-4o",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}"
+ },
+ {
+ "id": "override-global-claude-wildcard",
+ "name": "Global Claude Models Pricing",
+ "scope_kind": "global",
+ "match_type": "wildcard",
+ "pattern": "claude-*",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}"
+ },
+ {
+ "id": "override-provider-openai-gpt4o-mini",
+ "name": "OpenAI GPT-4o Mini Pricing",
+ "scope_kind": "provider",
+ "provider_id": "openai",
+ "match_type": "exact",
+ "pattern": "gpt-4o-mini",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.00000015,\"output_cost_per_token\":0.0000006}"
+ }
+ ]
+ },
+ "plugins": [
+ {
+ "name": "governance",
+ "enabled": true,
+ "config": {
+ "is_vk_mandatory": false
+ }
+ }
+ ],
+ "providers": {
+ "openai": {
+ "keys": [
+ {
+ "id": "key-openai-1",
+ "name": "openai-key-1",
+ "value": "env.OPENAI_API_KEY",
+ "weight": 1,
+ "models": ["*"]
+ }
+ ]
+ },
+ "anthropic": {
+ "keys": [
+ {
+ "id": "key-anthropic-1",
+ "name": "anthropic-key-1",
+ "value": "env.ANTHROPIC_API_KEY",
+ "weight": 1,
+ "models": ["*"]
+ }
+ ]
+ }
+ }
+}
diff --git a/examples/configs/withpricingoverridessqlite/config.json b/examples/configs/withpricingoverridessqlite/config.json
new file mode 100644
index 0000000000..b99094bcea
--- /dev/null
+++ b/examples/configs/withpricingoverridessqlite/config.json
@@ -0,0 +1,82 @@
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "config_store": {
+ "enabled": true,
+ "type": "sqlite",
+ "config": {
+ "path": "config.db"
+ }
+ },
+ "logs_store": {
+ "enabled": true,
+ "type": "sqlite",
+ "config": {
+ "path": "logs.db"
+ }
+ },
+ "governance": {
+ "pricing_overrides": [
+ {
+ "id": "override-global-gpt4o",
+ "name": "Global GPT-4o Pricing",
+ "scope_kind": "global",
+ "match_type": "exact",
+ "pattern": "gpt-4o",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.0000025,\"output_cost_per_token\":0.00001}"
+ },
+ {
+ "id": "override-global-claude-wildcard",
+ "name": "Global Claude Models Pricing",
+ "scope_kind": "global",
+ "match_type": "wildcard",
+ "pattern": "claude-*",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.000003,\"output_cost_per_token\":0.000015}"
+ },
+ {
+ "id": "override-provider-openai-gpt4o-mini",
+ "name": "OpenAI GPT-4o Mini Pricing",
+ "scope_kind": "provider",
+ "provider_id": "openai",
+ "match_type": "exact",
+ "pattern": "gpt-4o-mini",
+ "request_types": ["chat_completion"],
+ "pricing_patch": "{\"input_cost_per_token\":0.00000015,\"output_cost_per_token\":0.0000006}"
+ }
+ ]
+ },
+ "plugins": [
+ {
+ "name": "governance",
+ "enabled": true,
+ "config": {
+ "is_vk_mandatory": false
+ }
+ }
+ ],
+ "providers": {
+ "openai": {
+ "keys": [
+ {
+ "id": "key-openai-1",
+ "name": "openai-key-1",
+ "value": "env.OPENAI_API_KEY",
+ "weight": 1,
+ "models": ["*"]
+ }
+ ]
+ },
+ "anthropic": {
+ "keys": [
+ {
+ "id": "key-anthropic-1",
+ "name": "anthropic-key-1",
+ "value": "env.ANTHROPIC_API_KEY",
+ "weight": 1,
+ "models": ["*"]
+ }
+ ]
+ }
+ }
+}
diff --git a/framework/configstore/clientconfig.go b/framework/configstore/clientconfig.go
index 76b8631ee4..431d36d4c3 100644
--- a/framework/configstore/clientconfig.go
+++ b/framework/configstore/clientconfig.go
@@ -262,7 +262,6 @@ type ProviderConfig struct {
SendBackRawResponse bool `json:"send_back_raw_response"` // Include raw response in BifrostResponse
StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients
CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration
- PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides
ConfigHash string `json:"config_hash,omitempty"` // Hash of config.json version, used for change detection
Status string `json:"status,omitempty"` // Model discovery status for keyless providers
Description string `json:"description,omitempty"` // Model discovery error message for keyless providers
@@ -282,7 +281,6 @@ func (p *ProviderConfig) Redacted() *ProviderConfig {
SendBackRawResponse: p.SendBackRawResponse,
StoreRawRequestResponse: p.StoreRawRequestResponse,
CustomProviderConfig: p.CustomProviderConfig,
- PricingOverrides: p.PricingOverrides,
ConfigHash: p.ConfigHash,
Status: p.Status,
Description: p.Description,
@@ -451,15 +449,6 @@ func (p *ProviderConfig) GenerateConfigHash(providerName string) (string, error)
hash.Write(data)
}
- // Hash PricingOverrides
- if p.PricingOverrides != nil {
- data, err := sonic.Marshal(p.PricingOverrides)
- if err != nil {
- return "", err
- }
- hash.Write(data)
- }
-
// Hash SendBackRawRequest
if p.SendBackRawRequest {
hash.Write([]byte("sendBackRawRequest"))
@@ -978,6 +967,23 @@ func GenerateRoutingRuleHash(r tables.TableRoutingRule) (string, error) {
return hex.EncodeToString(hash.Sum(nil)), nil
}
+// GeneratePricingOverrideHash generates a SHA256 hash for a pricing override.
+// Skips: CreatedAt, UpdatedAt, ConfigHash (dynamic/meta fields).
+func GeneratePricingOverrideHash(p tables.TablePricingOverride) (string, error) {
+ hash := sha256.New()
+ hash.Write([]byte(p.ID))
+ hash.Write([]byte(p.Name))
+ hash.Write([]byte(p.ScopeKind))
+ hash.Write([]byte(derefStr(p.VirtualKeyID)))
+ hash.Write([]byte(derefStr(p.ProviderID)))
+ hash.Write([]byte(derefStr(p.ProviderKeyID)))
+ hash.Write([]byte(p.MatchType))
+ hash.Write([]byte(p.Pattern))
+ hash.Write([]byte(p.RequestTypesJSON))
+ hash.Write([]byte(p.PricingPatchJSON))
+ return hex.EncodeToString(hash.Sum(nil)), nil
+}
+
// GenerateMCPClientHash generates a SHA256 hash for an MCP client.
// This is used to detect changes to MCP clients between config.json and database.
// Skips: ID (autoIncrement), CreatedAt, UpdatedAt (dynamic fields)
@@ -1101,14 +1107,17 @@ type AuthConfig struct {
// ConfigMap maps provider names to their configurations.
type ConfigMap map[schemas.ModelProvider]ProviderConfig
+// GovernanceConfig contains governance entities loaded from the config store or
+// reconciled from config.json.
type GovernanceConfig struct {
- VirtualKeys []tables.TableVirtualKey `json:"virtual_keys"`
- Teams []tables.TableTeam `json:"teams"`
- Customers []tables.TableCustomer `json:"customers"`
- Budgets []tables.TableBudget `json:"budgets"`
- RateLimits []tables.TableRateLimit `json:"rate_limits"`
- ModelConfigs []tables.TableModelConfig `json:"model_configs"`
- Providers []tables.TableProvider `json:"providers"`
- RoutingRules []tables.TableRoutingRule `json:"routing_rules"`
- AuthConfig *AuthConfig `json:"auth_config,omitempty"`
+ VirtualKeys []tables.TableVirtualKey `json:"virtual_keys"`
+ Teams []tables.TableTeam `json:"teams"`
+ Customers []tables.TableCustomer `json:"customers"`
+ Budgets []tables.TableBudget `json:"budgets"`
+ RateLimits []tables.TableRateLimit `json:"rate_limits"`
+ ModelConfigs []tables.TableModelConfig `json:"model_configs"`
+ Providers []tables.TableProvider `json:"providers"`
+ RoutingRules []tables.TableRoutingRule `json:"routing_rules"`
+ PricingOverrides []tables.TablePricingOverride `json:"pricing_overrides,omitempty"`
+ AuthConfig *AuthConfig `json:"auth_config,omitempty"`
}
diff --git a/framework/configstore/migrations.go b/framework/configstore/migrations.go
index 9f0f4c22a7..23c69dafa8 100644
--- a/framework/configstore/migrations.go
+++ b/framework/configstore/migrations.go
@@ -274,7 +274,7 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error {
if err := migrationAddEnforceAuthOnInferenceColumn(ctx, db); err != nil {
return err
}
- if err := migrationAddProviderPricingOverridesColumn(ctx, db); err != nil {
+ if err := migrationReconcilePricingOverridesTable(ctx, db); err != nil {
return err
}
if err := migrationAddEncryptionColumns(ctx, db); err != nil {
@@ -329,6 +329,9 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error {
if err := migrationAddMCPClientAllowedExtraHeadersJSONColumn(ctx, db); err != nil {
return err
}
+ if err := migrationMakeBasePricingColumnsNullable(ctx, db); err != nil {
+ return err
+ }
return nil
}
@@ -355,7 +358,6 @@ func migrationAddStoreRawRequestResponseColumn(ctx context.Context, db *gorm.DB)
"concurrency_buffer_json",
"proxy_config_json",
"custom_provider_config_json",
- "pricing_overrides_json",
"send_back_raw_request",
"send_back_raw_response",
"store_raw_request_response",
@@ -373,7 +375,6 @@ func migrationAddStoreRawRequestResponseColumn(ctx context.Context, db *gorm.DB)
SendBackRawResponse: provider.SendBackRawResponse,
StoreRawRequestResponse: provider.StoreRawRequestResponse,
CustomProviderConfig: provider.CustomProviderConfig,
- PricingOverrides: provider.PricingOverrides,
}
// Here the default value of store_raw_request_response should be based on the default value of SendBackRawRequest and SendBackRawResponse
if provider.SendBackRawRequest || provider.SendBackRawResponse {
@@ -511,6 +512,11 @@ func migrationInit(ctx context.Context, db *gorm.DB) error {
return err
}
}
+ if !migrator.HasTable(&tables.TablePricingOverride{}) {
+ if err := migrator.CreateTable(&tables.TablePricingOverride{}); err != nil {
+ return err
+ }
+ }
if !migrator.HasTable(&tables.TablePlugin{}) {
if err := migrator.CreateTable(&tables.TablePlugin{}); err != nil {
return err
@@ -568,6 +574,9 @@ func migrationInit(ctx context.Context, db *gorm.DB) error {
if err := migrator.DropTable(&tables.TableModelPricing{}); err != nil {
return err
}
+ if err := migrator.DropTable(&tables.TablePricingOverride{}); err != nil {
+ return err
+ }
if err := migrator.DropTable(&tables.TablePlugin{}); err != nil {
return err
}
@@ -4042,33 +4051,45 @@ func migrationAddEnforceAuthOnInferenceColumn(ctx context.Context, db *gorm.DB)
return nil
}
-// migrationAddProviderPricingOverridesColumn adds the pricing_overrides_json column to the config_provider table
-func migrationAddProviderPricingOverridesColumn(ctx context.Context, db *gorm.DB) error {
+func migrationReconcilePricingOverridesTable(ctx context.Context, db *gorm.DB) error {
m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{
- ID: "add_provider_pricing_overrides_column",
+ ID: "reconcile_pricing_overrides_table",
Migrate: func(tx *gorm.DB) error {
tx = tx.WithContext(ctx)
- migrator := tx.Migrator()
- if !migrator.HasColumn(&tables.TableProvider{}, "pricing_overrides_json") {
- if err := migrator.AddColumn(&tables.TableProvider{}, "PricingOverridesJSON"); err != nil {
- return fmt.Errorf("failed to add pricing_overrides_json column: %w", err)
+ mgr := tx.Migrator()
+
+ if !mgr.HasTable(&tables.TablePricingOverride{}) {
+ if err := mgr.CreateTable(&tables.TablePricingOverride{}); err != nil {
+ return fmt.Errorf("failed to create governance_pricing_overrides table: %w", err)
+ }
+ return nil
+ }
+ if err := tx.AutoMigrate(&tables.TablePricingOverride{}); err != nil {
+ return fmt.Errorf("failed to automigrate governance_pricing_overrides table: %w", err)
+ }
+ for _, indexName := range []string{"idx_pricing_override_scope", "idx_pricing_override_match"} {
+ if mgr.HasIndex(&tables.TablePricingOverride{}, indexName) {
+ continue
+ }
+ if err := mgr.CreateIndex(&tables.TablePricingOverride{}, indexName); err != nil {
+ return fmt.Errorf("failed to create pricing override index %s: %w", indexName, err)
}
}
return nil
},
Rollback: func(tx *gorm.DB) error {
tx = tx.WithContext(ctx)
- migrator := tx.Migrator()
- if migrator.HasColumn(&tables.TableProvider{}, "pricing_overrides_json") {
- if err := migrator.DropColumn(&tables.TableProvider{}, "pricing_overrides_json"); err != nil {
- return fmt.Errorf("failed to drop pricing_overrides_json column: %w", err)
+ mgr := tx.Migrator()
+ if mgr.HasTable(&tables.TablePricingOverride{}) {
+ if err := mgr.DropTable(&tables.TablePricingOverride{}); err != nil {
+ return fmt.Errorf("failed to drop governance_pricing_overrides table: %w", err)
}
}
return nil
},
}})
if err := m.Migrate(); err != nil {
- return fmt.Errorf("error running provider pricing overrides column migration: %s", err.Error())
+ return fmt.Errorf("error while running pricing overrides table reconcile migration: %s", err.Error())
}
return nil
}
@@ -5108,3 +5129,31 @@ func migrationAddPluginOrderColumns(ctx context.Context, db *gorm.DB) error {
}
return nil
}
+
+// migrationMakeBasePricingColumnsNullable drops the NOT NULL constraint on
+// input_cost_per_token and output_cost_per_token in governance_model_pricing,
+// allowing models that only have non-token pricing (image, audio, video) to be
+// stored without a placeholder zero value.
+func migrationMakeBasePricingColumnsNullable(ctx context.Context, db *gorm.DB) error {
+ m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{
+ ID: "make_base_pricing_columns_nullable",
+ Migrate: func(tx *gorm.DB) error {
+ tx = tx.WithContext(ctx)
+ m := tx.Migrator()
+ if err := m.AlterColumn(&tables.TableModelPricing{}, "InputCostPerToken"); err != nil {
+ return fmt.Errorf("failed to alter input_cost_per_token: %w", err)
+ }
+ if err := m.AlterColumn(&tables.TableModelPricing{}, "OutputCostPerToken"); err != nil {
+ return fmt.Errorf("failed to alter output_cost_per_token: %w", err)
+ }
+ return nil
+ },
+ Rollback: func(tx *gorm.DB) error {
+ return nil
+ },
+ }})
+ if err := m.Migrate(); err != nil {
+ return fmt.Errorf("error while running make_base_pricing_columns_nullable migration: %s", err.Error())
+ }
+ return nil
+}
diff --git a/framework/configstore/rdb.go b/framework/configstore/rdb.go
index 3c5d138f59..5ae4bacd99 100644
--- a/framework/configstore/rdb.go
+++ b/framework/configstore/rdb.go
@@ -252,7 +252,6 @@ func (s *RDBConfigStore) UpdateProvidersConfig(ctx context.Context, providers ma
SendBackRawResponse: providerConfig.SendBackRawResponse,
StoreRawRequestResponse: providerConfig.StoreRawRequestResponse,
CustomProviderConfig: providerConfig.CustomProviderConfig,
- PricingOverrides: providerConfig.PricingOverrides,
ConfigHash: providerConfig.ConfigHash,
Status: providerConfig.Status,
Description: providerConfig.Description,
@@ -423,7 +422,6 @@ func (s *RDBConfigStore) UpdateProvider(ctx context.Context, provider schemas.Mo
dbProvider.SendBackRawResponse = configCopy.SendBackRawResponse
dbProvider.StoreRawRequestResponse = configCopy.StoreRawRequestResponse
dbProvider.CustomProviderConfig = configCopy.CustomProviderConfig
- dbProvider.PricingOverrides = configCopy.PricingOverrides
dbProvider.ConfigHash = configCopy.ConfigHash
// Save the updated provider
@@ -562,7 +560,6 @@ func (s *RDBConfigStore) AddProvider(ctx context.Context, provider schemas.Model
SendBackRawResponse: configCopy.SendBackRawResponse,
StoreRawRequestResponse: configCopy.StoreRawRequestResponse,
CustomProviderConfig: configCopy.CustomProviderConfig,
- PricingOverrides: configCopy.PricingOverrides,
ConfigHash: configCopy.ConfigHash,
}
// Create the provider
@@ -721,7 +718,6 @@ func (s *RDBConfigStore) GetProvidersConfig(ctx context.Context) (map[schemas.Mo
SendBackRawResponse: dbProvider.SendBackRawResponse,
StoreRawRequestResponse: dbProvider.StoreRawRequestResponse,
CustomProviderConfig: dbProvider.CustomProviderConfig,
- PricingOverrides: dbProvider.PricingOverrides,
ConfigHash: dbProvider.ConfigHash,
Status: dbProvider.Status,
Description: dbProvider.Description,
@@ -770,7 +766,6 @@ func (s *RDBConfigStore) GetProviderConfig(ctx context.Context, provider schemas
SendBackRawResponse: dbProvider.SendBackRawResponse,
StoreRawRequestResponse: dbProvider.StoreRawRequestResponse,
CustomProviderConfig: dbProvider.CustomProviderConfig,
- PricingOverrides: dbProvider.PricingOverrides,
ConfigHash: dbProvider.ConfigHash,
Status: dbProvider.Status,
Description: dbProvider.Description,
@@ -1307,6 +1302,130 @@ func (s *RDBConfigStore) DeleteModelPrices(ctx context.Context, tx ...*gorm.DB)
return txDB.WithContext(ctx).Session(&gorm.Session{AllowGlobalUpdate: true}).Delete(&tables.TableModelPricing{}).Error
}
+func (s *RDBConfigStore) GetPricingOverrides(ctx context.Context, filters PricingOverrideFilters) ([]tables.TablePricingOverride, error) {
+ var overrides []tables.TablePricingOverride
+ q := s.db.WithContext(ctx).Model(&tables.TablePricingOverride{})
+ if filters.ScopeKind != nil {
+ q = q.Where("scope_kind = ?", *filters.ScopeKind)
+ }
+ if filters.VirtualKeyID != nil {
+ q = q.Where("virtual_key_id = ?", *filters.VirtualKeyID)
+ }
+ if filters.ProviderID != nil {
+ q = q.Where("provider_id = ?", *filters.ProviderID)
+ }
+ if filters.ProviderKeyID != nil {
+ q = q.Where("provider_key_id = ?", *filters.ProviderKeyID)
+ }
+ if err := q.Order("created_at ASC").Find(&overrides).Error; err != nil {
+ return nil, s.parseGormError(err)
+ }
+ return overrides, nil
+}
+
+func (s *RDBConfigStore) GetPricingOverridesPaginated(ctx context.Context, params PricingOverridesQueryParams) ([]tables.TablePricingOverride, int64, error) {
+ baseQuery := s.db.WithContext(ctx).Model(&tables.TablePricingOverride{})
+
+ if params.Search != "" {
+ search := "%" + strings.ToLower(params.Search) + "%"
+ baseQuery = baseQuery.Where("LOWER(name) LIKE ?", search)
+ }
+ if params.ScopeKind != nil {
+ baseQuery = baseQuery.Where("scope_kind = ?", *params.ScopeKind)
+ }
+ if params.VirtualKeyID != nil {
+ baseQuery = baseQuery.Where("virtual_key_id = ?", *params.VirtualKeyID)
+ }
+ if params.ProviderID != nil {
+ baseQuery = baseQuery.Where("provider_id = ?", *params.ProviderID)
+ }
+ if params.ProviderKeyID != nil {
+ baseQuery = baseQuery.Where("provider_key_id = ?", *params.ProviderKeyID)
+ }
+
+ var totalCount int64
+ if err := baseQuery.Count(&totalCount).Error; err != nil {
+ return nil, 0, err
+ }
+
+ limit := params.Limit
+ offset := params.Offset
+
+ if limit <= 0 {
+ limit = 25
+ } else if limit > 100 {
+ limit = 100
+ }
+
+ if offset < 0 {
+ offset = 0
+ }
+
+ var overrides []tables.TablePricingOverride
+ if err := baseQuery.
+ Order("created_at ASC").
+ Offset(offset).
+ Limit(limit).
+ Find(&overrides).Error; err != nil {
+ return nil, 0, s.parseGormError(err)
+ }
+ return overrides, totalCount, nil
+}
+
+func (s *RDBConfigStore) GetPricingOverrideByID(ctx context.Context, id string) (*tables.TablePricingOverride, error) {
+ var override tables.TablePricingOverride
+ if err := s.db.WithContext(ctx).First(&override, "id = ?", id).Error; err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, ErrNotFound
+ }
+ return nil, s.parseGormError(err)
+ }
+ return &override, nil
+}
+
+func (s *RDBConfigStore) CreatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error {
+ var txDB *gorm.DB
+ if len(tx) > 0 {
+ txDB = tx[0]
+ } else {
+ txDB = s.db
+ }
+ if err := txDB.WithContext(ctx).Create(override).Error; err != nil {
+ return s.parseGormError(err)
+ }
+ return nil
+}
+
+func (s *RDBConfigStore) UpdatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error {
+ var txDB *gorm.DB
+ if len(tx) > 0 {
+ txDB = tx[0]
+ } else {
+ txDB = s.db
+ }
+ if err := txDB.WithContext(ctx).Save(override).Error; err != nil {
+ return s.parseGormError(err)
+ }
+ return nil
+}
+
+func (s *RDBConfigStore) DeletePricingOverride(ctx context.Context, id string, tx ...*gorm.DB) error {
+ var txDB *gorm.DB
+ if len(tx) > 0 {
+ txDB = tx[0]
+ } else {
+ txDB = s.db
+ }
+ res := txDB.WithContext(ctx).Delete(&tables.TablePricingOverride{}, "id = ?", id)
+ if res.Error != nil {
+ return s.parseGormError(res.Error)
+ }
+ if res.RowsAffected == 0 {
+ return ErrNotFound
+ }
+ return nil
+}
+
// MODEL PARAMETERS METHODS
// GetModelParameters retrieves model parameters for a specific model.
diff --git a/framework/configstore/store.go b/framework/configstore/store.go
index 8d2117c6b5..11d6a6a899 100644
--- a/framework/configstore/store.go
+++ b/framework/configstore/store.go
@@ -59,6 +59,25 @@ type CustomersQueryParams struct {
Search string
}
+// PricingOverrideFilters holds the filters for pricing overrides.
+type PricingOverrideFilters struct {
+ ScopeKind *string
+ VirtualKeyID *string
+ ProviderID *string
+ ProviderKeyID *string
+}
+
+// PricingOverridesQueryParams holds pagination, filtering, and search parameters for pricing override queries.
+type PricingOverridesQueryParams struct {
+ Limit int
+ Offset int
+ Search string
+ ScopeKind *string
+ VirtualKeyID *string
+ ProviderID *string
+ ProviderKeyID *string
+}
+
// ConfigStore is the interface for the config store.
type ConfigStore interface {
// Health check
@@ -218,6 +237,14 @@ type ConfigStore interface {
UpsertModelPrices(ctx context.Context, pricing *tables.TableModelPricing, tx ...*gorm.DB) error
DeleteModelPrices(ctx context.Context, tx ...*gorm.DB) error
+ // Governance pricing overrides CRUD
+ GetPricingOverrides(ctx context.Context, filters PricingOverrideFilters) ([]tables.TablePricingOverride, error)
+ GetPricingOverridesPaginated(ctx context.Context, params PricingOverridesQueryParams) ([]tables.TablePricingOverride, int64, error)
+ GetPricingOverrideByID(ctx context.Context, id string) (*tables.TablePricingOverride, error)
+ CreatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error
+ UpdatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error
+ DeletePricingOverride(ctx context.Context, id string, tx ...*gorm.DB) error
+
// Model parameters
GetModelParameters(ctx context.Context, model string) (*tables.TableModelParameters, error)
UpsertModelParameters(ctx context.Context, params *tables.TableModelParameters, tx ...*gorm.DB) error
diff --git a/framework/configstore/tables/modelpricing.go b/framework/configstore/tables/modelpricing.go
index d264e74207..b34d3e3385 100644
--- a/framework/configstore/tables/modelpricing.go
+++ b/framework/configstore/tables/modelpricing.go
@@ -9,8 +9,8 @@ type TableModelPricing struct {
Mode string `gorm:"type:varchar(50);not null;uniqueIndex:idx_model_provider_mode" json:"mode"`
// Costs - Text
- InputCostPerToken float64 `gorm:"not null" json:"input_cost_per_token"`
- OutputCostPerToken float64 `gorm:"not null" json:"output_cost_per_token"`
+ InputCostPerToken *float64 `gorm:"default:null" json:"input_cost_per_token,omitempty"`
+ OutputCostPerToken *float64 `gorm:"default:null" json:"output_cost_per_token,omitempty"`
InputCostPerTokenBatches *float64 `gorm:"default:null;column:input_cost_per_token_batches" json:"input_cost_per_token_batches,omitempty"`
OutputCostPerTokenBatches *float64 `gorm:"default:null;column:output_cost_per_token_batches" json:"output_cost_per_token_batches,omitempty"`
InputCostPerTokenPriority *float64 `gorm:"default:null;column:input_cost_per_token_priority" json:"input_cost_per_token_priority,omitempty"`
diff --git a/framework/configstore/tables/pricingoverride.go b/framework/configstore/tables/pricingoverride.go
new file mode 100644
index 0000000000..e4b23e3069
--- /dev/null
+++ b/framework/configstore/tables/pricingoverride.go
@@ -0,0 +1,55 @@
+package tables
+
+import (
+ "encoding/json"
+ "time"
+
+ "github.com/maximhq/bifrost/core/schemas"
+ "gorm.io/gorm"
+)
+
+// TablePricingOverride is the persistence model for governance pricing overrides.
+type TablePricingOverride struct {
+ ID string `gorm:"primaryKey;type:varchar(255)" json:"id"`
+ Name string `gorm:"type:varchar(255);not null" json:"name"`
+ ScopeKind string `gorm:"type:varchar(50);index:idx_pricing_override_scope;not null" json:"scope_kind"`
+ VirtualKeyID *string `gorm:"type:varchar(255);index:idx_pricing_override_scope" json:"virtual_key_id,omitempty"`
+ ProviderID *string `gorm:"type:varchar(255);index:idx_pricing_override_scope" json:"provider_id,omitempty"`
+ ProviderKeyID *string `gorm:"type:varchar(255);index:idx_pricing_override_scope" json:"provider_key_id,omitempty"`
+ MatchType string `gorm:"type:varchar(20);index:idx_pricing_override_match;not null" json:"match_type"`
+ Pattern string `gorm:"type:varchar(255);not null" json:"pattern"`
+ RequestTypesJSON string `gorm:"type:text" json:"-"`
+ PricingPatchJSON string `gorm:"type:text" json:"pricing_patch,omitempty"`
+ ConfigHash string `gorm:"type:varchar(255);null" json:"config_hash,omitempty"`
+ CreatedAt time.Time `gorm:"index;not null" json:"created_at"`
+ UpdatedAt time.Time `gorm:"index;not null" json:"updated_at"`
+
+ RequestTypes []schemas.RequestType `gorm:"-" json:"request_types,omitempty"`
+}
+
+// TableName returns the backing table name for governance pricing overrides.
+func (TablePricingOverride) TableName() string { return "governance_pricing_overrides" }
+
+// BeforeSave serializes virtual fields into their JSON columns before persistence.
+func (p *TablePricingOverride) BeforeSave(tx *gorm.DB) error {
+ if len(p.RequestTypes) > 0 {
+ b, err := json.Marshal(p.RequestTypes)
+ if err != nil {
+ return err
+ }
+ p.RequestTypesJSON = string(b)
+ } else {
+ p.RequestTypesJSON = "[]"
+ }
+ return nil
+}
+
+// AfterFind restores virtual fields from their persisted JSON columns.
+func (p *TablePricingOverride) AfterFind(tx *gorm.DB) error {
+ if p.RequestTypesJSON != "" {
+ if err := json.Unmarshal([]byte(p.RequestTypesJSON), &p.RequestTypes); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/framework/configstore/tables/provider.go b/framework/configstore/tables/provider.go
index 5042ca82f1..c76e0db0b2 100644
--- a/framework/configstore/tables/provider.go
+++ b/framework/configstore/tables/provider.go
@@ -21,7 +21,6 @@ type TableProvider struct {
ConcurrencyBufferJSON string `gorm:"type:text" json:"-"` // JSON serialized schemas.ConcurrencyAndBufferSize
ProxyConfigJSON string `gorm:"type:text" json:"-"` // JSON serialized schemas.ProxyConfig
CustomProviderConfigJSON string `gorm:"type:text" json:"-"` // JSON serialized schemas.CustomProviderConfig
- PricingOverridesJSON string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ProviderPricingOverride
SendBackRawRequest bool `json:"send_back_raw_request"`
SendBackRawResponse bool `json:"send_back_raw_response"`
StoreRawRequestResponse bool `json:"store_raw_request_response"`
@@ -37,8 +36,7 @@ type TableProvider struct {
ProxyConfig *schemas.ProxyConfig `gorm:"-" json:"proxy_config,omitempty"`
// Custom provider fields
- CustomProviderConfig *schemas.CustomProviderConfig `gorm:"-" json:"custom_provider_config,omitempty"`
- PricingOverrides []schemas.ProviderPricingOverride `gorm:"-" json:"pricing_overrides,omitempty"`
+ CustomProviderConfig *schemas.CustomProviderConfig `gorm:"-" json:"custom_provider_config,omitempty"`
// Foreign keys
Models []TableModel `gorm:"foreignKey:ProviderID;constraint:OnDelete:CASCADE" json:"models"`
@@ -100,16 +98,6 @@ func (p *TableProvider) BeforeSave(tx *gorm.DB) error {
}
p.CustomProviderConfigJSON = string(data)
}
- if p.PricingOverrides != nil {
- data, err := json.Marshal(p.PricingOverrides)
- if err != nil {
- return err
- }
- p.PricingOverridesJSON = string(data)
- } else {
- p.PricingOverridesJSON = ""
- }
-
// Validate governance fields
if p.BudgetID != nil && strings.TrimSpace(*p.BudgetID) == "" {
return fmt.Errorf("budget_id cannot be an empty string")
@@ -173,13 +161,5 @@ func (p *TableProvider) AfterFind(tx *gorm.DB) error {
p.CustomProviderConfig = &customConfig
}
- if p.PricingOverridesJSON != "" {
- var overrides []schemas.ProviderPricingOverride
- if err := json.Unmarshal([]byte(p.PricingOverridesJSON), &overrides); err != nil {
- return err
- }
- p.PricingOverrides = overrides
- }
-
return nil
}
diff --git a/framework/logstore/tables.go b/framework/logstore/tables.go
index dae5502078..c408a8f408 100644
--- a/framework/logstore/tables.go
+++ b/framework/logstore/tables.go
@@ -29,22 +29,22 @@ const (
// SearchFilters represents the available filters for log searches
type SearchFilters struct {
- Providers []string `json:"providers,omitempty"`
- Models []string `json:"models,omitempty"`
- Status []string `json:"status,omitempty"`
- Objects []string `json:"objects,omitempty"` // For filtering by request type (chat.completion, text.completion, embedding)
- SelectedKeyIDs []string `json:"selected_key_ids,omitempty"`
- VirtualKeyIDs []string `json:"virtual_key_ids,omitempty"`
- RoutingRuleIDs []string `json:"routing_rule_ids,omitempty"`
- RoutingEngineUsed []string `json:"routing_engine_used,omitempty"` // For filtering by routing engine (routing-rule, governance, loadbalancing)
- StartTime *time.Time `json:"start_time,omitempty"`
- EndTime *time.Time `json:"end_time,omitempty"`
- MinLatency *float64 `json:"min_latency,omitempty"`
- MaxLatency *float64 `json:"max_latency,omitempty"`
- MinTokens *int `json:"min_tokens,omitempty"`
- MaxTokens *int `json:"max_tokens,omitempty"`
- MinCost *float64 `json:"min_cost,omitempty"`
- MaxCost *float64 `json:"max_cost,omitempty"`
+ Providers []string `json:"providers,omitempty"`
+ Models []string `json:"models,omitempty"`
+ Status []string `json:"status,omitempty"`
+ Objects []string `json:"objects,omitempty"` // For filtering by request type (chat.completion, text.completion, embedding)
+ SelectedKeyIDs []string `json:"selected_key_ids,omitempty"`
+ VirtualKeyIDs []string `json:"virtual_key_ids,omitempty"`
+ RoutingRuleIDs []string `json:"routing_rule_ids,omitempty"`
+ RoutingEngineUsed []string `json:"routing_engine_used,omitempty"` // For filtering by routing engine (routing-rule, governance, loadbalancing)
+ StartTime *time.Time `json:"start_time,omitempty"`
+ EndTime *time.Time `json:"end_time,omitempty"`
+ MinLatency *float64 `json:"min_latency,omitempty"`
+ MaxLatency *float64 `json:"max_latency,omitempty"`
+ MinTokens *int `json:"min_tokens,omitempty"`
+ MaxTokens *int `json:"max_tokens,omitempty"`
+ MinCost *float64 `json:"min_cost,omitempty"`
+ MaxCost *float64 `json:"max_cost,omitempty"`
MissingCostOnly bool `json:"missing_cost_only,omitempty"`
ContentSearch string `json:"content_search,omitempty"`
MetadataFilters map[string]string `json:"metadata_filters,omitempty"` // key=metadataKey, value=metadataValue for filtering by metadata
@@ -78,59 +78,59 @@ type SearchStats struct {
// Log represents a complete log entry for a request/response cycle
// This is the GORM model with appropriate tags
type Log struct {
- ID string `gorm:"primaryKey;type:varchar(255)" json:"id"`
- ParentRequestID *string `gorm:"type:varchar(255)" json:"parent_request_id"`
- Timestamp time.Time `gorm:"index;index:idx_logs_ts_provider_status,priority:1;not null" json:"timestamp"`
- Object string `gorm:"type:varchar(255);index;not null;column:object_type" json:"object"` // text.completion, chat.completion, or embedding
- Provider string `gorm:"type:varchar(255);index;index:idx_logs_ts_provider_status,priority:2;not null" json:"provider"`
- Model string `gorm:"type:varchar(255);index;not null" json:"model"`
- NumberOfRetries int `gorm:"default:0" json:"number_of_retries"`
- FallbackIndex int `gorm:"default:0" json:"fallback_index"`
- SelectedKeyID string `gorm:"type:varchar(255);index:idx_logs_selected_key_id" json:"selected_key_id"`
- SelectedKeyName string `gorm:"type:varchar(255)" json:"selected_key_name"`
- VirtualKeyID *string `gorm:"type:varchar(255);index:idx_logs_virtual_key_id" json:"virtual_key_id"`
- VirtualKeyName *string `gorm:"type:varchar(255)" json:"virtual_key_name"`
- RoutingEnginesUsedStr *string `gorm:"type:varchar(255);column:routing_engines_used" json:"-"` // Comma-separated routing engines
- RoutingRuleID *string `gorm:"type:varchar(255);index:idx_logs_routing_rule_id" json:"routing_rule_id"`
- RoutingRuleName *string `gorm:"type:varchar(255)" json:"routing_rule_name"`
- InputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ChatMessage
- ResponsesInputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ResponsesMessage
- OutputMessage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ChatMessage
- ResponsesOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ResponsesMessage
- EmbeddingOutput string `gorm:"type:text" json:"-"` // JSON serialized [][]float32
- RerankOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.RerankResult
- Params string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ModelParameters
- Tools string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Tool
- ToolCalls string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ToolCall (For backward compatibility, tool calls are now in the content)
- SpeechInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.SpeechInput
- TranscriptionInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.TranscriptionInput
- ImageGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ImageGenerationInput
- VideoGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.VideoGenerationInput
- SpeechOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostSpeech
- TranscriptionOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostTranscribe
- ImageGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostImageGenerationResponse
- ListModelsOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Model
- VideoGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoGenerationResponse
- VideoRetrieveOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoRetrieveResponse
- VideoDownloadOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDownloadResponse
- VideoListOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoListResponse
- VideoDeleteOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDeleteResponse
- CacheDebug string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostCacheDebug
- Latency *float64 `gorm:"index:idx_logs_latency" json:"latency,omitempty"`
- TokenUsage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.LLMUsage
- Cost *float64 `gorm:"index" json:"cost,omitempty"` // Cost in dollars (total cost of the request - includes cache lookup cost)
- Status string `gorm:"type:varchar(50);index;index:idx_logs_ts_provider_status,priority:3;not null" json:"status"` // "processing", "success", or "error"
- ErrorDetails string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostError
- Stream bool `gorm:"default:false" json:"stream"` // true if this was a streaming response
- ContentSummary string `gorm:"type:text" json:"-"`
- RawRequest string `gorm:"type:text" json:"raw_request"` // Populated when `send-back-raw-request` is on
- RawResponse string `gorm:"type:text" json:"raw_response"` // Populated when `send-back-raw-response` is on
+ ID string `gorm:"primaryKey;type:varchar(255)" json:"id"`
+ ParentRequestID *string `gorm:"type:varchar(255)" json:"parent_request_id"`
+ Timestamp time.Time `gorm:"index;index:idx_logs_ts_provider_status,priority:1;not null" json:"timestamp"`
+ Object string `gorm:"type:varchar(255);index;not null;column:object_type" json:"object"` // text.completion, chat.completion, or embedding
+ Provider string `gorm:"type:varchar(255);index;index:idx_logs_ts_provider_status,priority:2;not null" json:"provider"`
+ Model string `gorm:"type:varchar(255);index;not null" json:"model"`
+ NumberOfRetries int `gorm:"default:0" json:"number_of_retries"`
+ FallbackIndex int `gorm:"default:0" json:"fallback_index"`
+ SelectedKeyID string `gorm:"type:varchar(255);index:idx_logs_selected_key_id" json:"selected_key_id"`
+ SelectedKeyName string `gorm:"type:varchar(255)" json:"selected_key_name"`
+ VirtualKeyID *string `gorm:"type:varchar(255);index:idx_logs_virtual_key_id" json:"virtual_key_id"`
+ VirtualKeyName *string `gorm:"type:varchar(255)" json:"virtual_key_name"`
+ RoutingEnginesUsedStr *string `gorm:"type:varchar(255);column:routing_engines_used" json:"-"` // Comma-separated routing engines
+ RoutingRuleID *string `gorm:"type:varchar(255);index:idx_logs_routing_rule_id" json:"routing_rule_id"`
+ RoutingRuleName *string `gorm:"type:varchar(255)" json:"routing_rule_name"`
+ InputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ChatMessage
+ ResponsesInputHistory string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ResponsesMessage
+ OutputMessage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ChatMessage
+ ResponsesOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ResponsesMessage
+ EmbeddingOutput string `gorm:"type:text" json:"-"` // JSON serialized [][]float32
+ RerankOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.RerankResult
+ Params string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ModelParameters
+ Tools string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Tool
+ ToolCalls string `gorm:"type:text" json:"-"` // JSON serialized []schemas.ToolCall (For backward compatibility, tool calls are now in the content)
+ SpeechInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.SpeechInput
+ TranscriptionInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.TranscriptionInput
+ ImageGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.ImageGenerationInput
+ VideoGenerationInput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.VideoGenerationInput
+ SpeechOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostSpeech
+ TranscriptionOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostTranscribe
+ ImageGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostImageGenerationResponse
+ ListModelsOutput string `gorm:"type:text" json:"-"` // JSON serialized []schemas.Model
+ VideoGenerationOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoGenerationResponse
+ VideoRetrieveOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoRetrieveResponse
+ VideoDownloadOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDownloadResponse
+ VideoListOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoListResponse
+ VideoDeleteOutput string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostVideoDeleteResponse
+ CacheDebug string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostCacheDebug
+ Latency *float64 `gorm:"index:idx_logs_latency" json:"latency,omitempty"`
+ TokenUsage string `gorm:"type:text" json:"-"` // JSON serialized *schemas.LLMUsage
+ Cost *float64 `gorm:"index" json:"cost,omitempty"` // Cost in dollars (total cost of the request - includes cache lookup cost)
+ Status string `gorm:"type:varchar(50);index;index:idx_logs_ts_provider_status,priority:3;not null" json:"status"` // "processing", "success", or "error"
+ ErrorDetails string `gorm:"type:text" json:"-"` // JSON serialized *schemas.BifrostError
+ Stream bool `gorm:"default:false" json:"stream"` // true if this was a streaming response
+ ContentSummary string `gorm:"type:text" json:"-"`
+ RawRequest string `gorm:"type:text" json:"raw_request"` // Populated when `send-back-raw-request` is on
+ RawResponse string `gorm:"type:text" json:"raw_response"` // Populated when `send-back-raw-response` is on
PassthroughRequestBody string `gorm:"type:text" json:"passthrough_request_body,omitempty"` // Raw body for passthrough requests (UTF-8)
PassthroughResponseBody string `gorm:"type:text" json:"passthrough_response_body,omitempty"` // Raw body for passthrough responses (UTF-8)
- RoutingEngineLogs string `gorm:"type:text" json:"routing_engine_logs,omitempty"` // Formatted routing engine decision logs
- Metadata *string `gorm:"type:text" json:"-"` // JSON serialized map[string]interface{}
- IsLargePayloadRequest bool `gorm:"default:false" json:"is_large_payload_request"`
- IsLargePayloadResponse bool `gorm:"default:false" json:"is_large_payload_response"`
+ RoutingEngineLogs string `gorm:"type:text" json:"routing_engine_logs,omitempty"` // Formatted routing engine decision logs
+ Metadata *string `gorm:"type:text" json:"-"` // JSON serialized map[string]interface{}
+ IsLargePayloadRequest bool `gorm:"default:false" json:"is_large_payload_request"`
+ IsLargePayloadResponse bool `gorm:"default:false" json:"is_large_payload_response"`
// Denormalized token fields for easier querying
PromptTokens int `gorm:"default:0" json:"-"`
diff --git a/framework/modelcatalog/main.go b/framework/modelcatalog/main.go
index be25cb69c8..3409074220 100644
--- a/framework/modelcatalog/main.go
+++ b/framework/modelcatalog/main.go
@@ -38,10 +38,13 @@ type ModelCatalog struct {
pricingData map[string]configstoreTables.TableModelPricing
mu sync.RWMutex
- // Provider-level pricing overrides are maintained separately to avoid contention
- // with pricing cache rebuilds.
- compiledOverrides map[schemas.ModelProvider][]compiledProviderPricingOverride
- overridesMu sync.RWMutex
+ // rawOverrides is the canonical list of all active overrides. It exists solely
+ // to support incremental mutations: UpsertPricingOverrides and DeletePricingOverride
+ // iterate over it to rebuild the list, then derive customPricing from it.
+ // customPricing is the actual lookup structure used at query time.
+ rawOverrides []PricingOverride
+ customPricing *customPricingData
+ overridesMu sync.RWMutex
modelPool map[schemas.ModelProvider][]string
unfilteredModelPool map[schemas.ModelProvider][]string // model pool without allowed models filtering
@@ -61,10 +64,13 @@ type PricingEntry struct {
BaseModel string `json:"base_model,omitempty"`
Provider string `json:"provider"`
Mode string `json:"mode"`
+ PricingOptions
+}
+type PricingOptions struct {
// Costs - Text
- InputCostPerToken float64 `json:"input_cost_per_token"`
- OutputCostPerToken float64 `json:"output_cost_per_token"`
+ InputCostPerToken *float64 `json:"input_cost_per_token,omitempty"`
+ OutputCostPerToken *float64 `json:"output_cost_per_token,omitempty"`
InputCostPerTokenBatches *float64 `json:"input_cost_per_token_batches,omitempty"`
OutputCostPerTokenBatches *float64 `json:"output_cost_per_token_batches,omitempty"`
InputCostPerTokenPriority *float64 `json:"input_cost_per_token_priority,omitempty"`
@@ -194,7 +200,6 @@ func Init(ctx context.Context, config *Config, configStore configstore.ConfigSto
configStore: configStore,
logger: logger,
pricingData: make(map[string]configstoreTables.TableModelPricing),
- compiledOverrides: make(map[schemas.ModelProvider][]compiledProviderPricingOverride),
modelPool: make(map[schemas.ModelProvider][]string),
unfilteredModelPool: make(map[schemas.ModelProvider][]string),
baseModelIndex: make(map[string]string),
@@ -251,6 +256,10 @@ func Init(ctx context.Context, config *Config, configStore configstore.ConfigSto
// Populate model pool with normalized providers from pricing data
mc.populateModelPoolFromPricingData()
+ if err := mc.loadPricingOverridesFromStore(ctx); err != nil {
+ return nil, fmt.Errorf("failed to load pricing overrides: %w", err)
+ }
+
// Start background sync worker
mc.syncCtx, mc.syncCancel = context.WithCancel(ctx)
mc.startSyncWorker(mc.syncCtx)
@@ -321,6 +330,10 @@ func (mc *ModelCatalog) ForceReloadPricing(ctx context.Context) error {
// Rebuild model pool from updated pricing data
mc.populateModelPoolFromPricingData()
+ if err := mc.loadPricingOverridesFromStore(ctx); err != nil {
+ return fmt.Errorf("failed to load pricing overrides: %w", err)
+ }
+
// Also sync model parameters
if err := mc.syncModelParameters(ctx); err != nil {
mc.logger.Warn("failed to sync model parameters during force reload: %v", err)
@@ -784,6 +797,79 @@ func (mc *ModelCatalog) RefineModelForProvider(provider schemas.ModelProvider, m
return model, nil
}
+// SetPricingOverrides replaces the full in-memory pricing override set.
+func (mc *ModelCatalog) SetPricingOverrides(rows []configstoreTables.TablePricingOverride) error {
+ seen := make(map[string]int, len(rows))
+ overrides := make([]PricingOverride, 0, len(rows))
+ for i := range rows {
+ o, err := convertTablePricingOverrideToPricingOverride(&rows[i])
+ if err != nil {
+ return err
+ }
+ if idx, exists := seen[o.ID]; exists {
+ overrides[idx] = o // last entry wins for duplicate IDs
+ } else {
+ seen[o.ID] = len(overrides)
+ overrides = append(overrides, o)
+ }
+ }
+ mc.overridesMu.Lock()
+ mc.rawOverrides = overrides
+ mc.customPricing = buildCustomPricingData(overrides)
+ mc.overridesMu.Unlock()
+ return nil
+}
+
+// UpsertPricingOverrides inserts or replaces one or more pricing overrides in a single
+// operation, rebuilding the lookup map only once at the end.
+func (mc *ModelCatalog) UpsertPricingOverrides(rows ...*configstoreTables.TablePricingOverride) error {
+ // Deduplicate the input batch by ID (last entry wins) and build the
+ // incoming set for O(1) lookup when filtering existing rawOverrides.
+ seenIncoming := make(map[string]int, len(rows))
+ overrides := make([]PricingOverride, 0, len(rows))
+ for _, row := range rows {
+ o, err := convertTablePricingOverrideToPricingOverride(row)
+ if err != nil {
+ return err
+ }
+ if idx, exists := seenIncoming[o.ID]; exists {
+ overrides[idx] = o // last entry wins for duplicate IDs
+ } else {
+ seenIncoming[o.ID] = len(overrides)
+ overrides = append(overrides, o)
+ }
+ }
+
+ mc.overridesMu.Lock()
+ defer mc.overridesMu.Unlock()
+
+ updated := make([]PricingOverride, 0, len(mc.rawOverrides)+len(overrides))
+ for _, o := range mc.rawOverrides {
+ if _, replacing := seenIncoming[o.ID]; !replacing {
+ updated = append(updated, o)
+ }
+ }
+ updated = append(updated, overrides...)
+ mc.rawOverrides = updated
+ mc.customPricing = buildCustomPricingData(updated)
+ return nil
+}
+
+// DeletePricingOverride removes a pricing override by ID.
+func (mc *ModelCatalog) DeletePricingOverride(id string) {
+ mc.overridesMu.Lock()
+ defer mc.overridesMu.Unlock()
+
+ updated := make([]PricingOverride, 0, len(mc.rawOverrides))
+ for _, o := range mc.rawOverrides {
+ if o.ID != id {
+ updated = append(updated, o)
+ }
+ }
+ mc.rawOverrides = updated
+ mc.customPricing = buildCustomPricingData(updated)
+}
+
// IsTextCompletionSupported checks if a model supports text completion for the given provider.
// Returns true if the model has pricing data for text completion ("text_completion"),
// false otherwise. This is used by the litellmcompat plugin to determine whether to
@@ -878,7 +964,6 @@ func NewTestCatalog(baseModelIndex map[string]string) *ModelCatalog {
unfilteredModelPool: make(map[schemas.ModelProvider][]string),
baseModelIndex: baseModelIndex,
pricingData: make(map[string]configstoreTables.TableModelPricing),
- compiledOverrides: make(map[schemas.ModelProvider][]compiledProviderPricingOverride),
done: make(chan struct{}),
}
}
diff --git a/framework/modelcatalog/main_test.go b/framework/modelcatalog/main_test.go
index 6715989743..c406a951f1 100644
--- a/framework/modelcatalog/main_test.go
+++ b/framework/modelcatalog/main_test.go
@@ -17,10 +17,9 @@ func newTestCatalog(modelPool map[schemas.ModelProvider][]string, baseModelIndex
baseModelIndex = make(map[string]string)
}
return &ModelCatalog{
- modelPool: modelPool,
- baseModelIndex: baseModelIndex,
- pricingData: make(map[string]configstoreTables.TableModelPricing),
- compiledOverrides: make(map[schemas.ModelProvider][]compiledProviderPricingOverride),
+ modelPool: modelPool,
+ baseModelIndex: baseModelIndex,
+ pricingData: make(map[string]configstoreTables.TableModelPricing),
}
}
diff --git a/framework/modelcatalog/overrides.go b/framework/modelcatalog/overrides.go
index 6eef025a48..f284a80a8e 100644
--- a/framework/modelcatalog/overrides.go
+++ b/framework/modelcatalog/overrides.go
@@ -1,279 +1,456 @@
package modelcatalog
import (
+ "context"
"fmt"
- "regexp"
+ "sort"
"strings"
"github.com/maximhq/bifrost/core/schemas"
+ "github.com/maximhq/bifrost/framework/configstore"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
)
-type compiledProviderPricingOverride struct {
- override schemas.ProviderPricingOverride
- regex *regexp.Regexp
- requestModes map[string]struct{}
- hasRequestFilter bool
- literalChars int
- order int
+// PricingLookupScopes carries the runtime identifiers used to resolve scoped
+// pricing overrides during cost calculation.
+type PricingLookupScopes struct {
+ VirtualKeyID string
+ SelectedKeyID string
+ Provider string
}
-func (mc *ModelCatalog) SetProviderPricingOverrides(provider schemas.ModelProvider, overrides []schemas.ProviderPricingOverride) error {
- compiled := make([]compiledProviderPricingOverride, 0, len(overrides))
- for i := range overrides {
- item, err := compileProviderPricingOverride(i, overrides[i])
- if err != nil {
- return fmt.Errorf("invalid pricing override for provider %s at index %d: %w", provider, i, err)
- }
- compiled = append(compiled, item)
- }
-
- mc.overridesMu.Lock()
- defer mc.overridesMu.Unlock()
- if len(compiled) == 0 {
- delete(mc.compiledOverrides, provider)
+// PricingLookupScopesFromContext builds a PricingLookupScopes from a BifrostContext.
+// It reads the governance virtual key ID (not the raw VK token) and the selected key ID.
+// provider should be the provider name string (e.g. "openai"), pass "" if unavailable.
+// Returns nil only when ctx is nil. An empty scopes value is still returned when all fields
+// are empty so that global-scope overrides are always evaluated.
+// DO NOT USE THIS FUNCTION IN A GO ROUTINE. This is because it reads from ctx which is cancelled when the request ends.
+// Better to call it in PostHooks synchronously and then pass the scopes object to the pricing manager.
+// Only use this in go routines when you know for sure that the request will not end before the go routine completes.
+func PricingLookupScopesFromContext(ctx *schemas.BifrostContext, provider string) *PricingLookupScopes {
+ if ctx == nil {
return nil
}
- mc.compiledOverrides[provider] = compiled
- return nil
+ virtualKeyID, _ := ctx.Value(schemas.BifrostContextKeyGovernanceVirtualKeyID).(string)
+ selectedKeyID, _ := ctx.Value(schemas.BifrostContextKeySelectedKeyID).(string)
+ return &PricingLookupScopes{
+ VirtualKeyID: virtualKeyID,
+ SelectedKeyID: selectedKeyID,
+ Provider: provider,
+ }
}
-func (mc *ModelCatalog) DeleteProviderPricingOverrides(provider schemas.ModelProvider) {
- mc.overridesMu.Lock()
- defer mc.overridesMu.Unlock()
- delete(mc.compiledOverrides, provider)
-}
+// ScopeKind identifies which governance scope an override applies to.
+type ScopeKind string
-func (mc *ModelCatalog) applyPricingOverrides(provider schemas.ModelProvider, model string, requestType schemas.RequestType, pricing configstoreTables.TableModelPricing) configstoreTables.TableModelPricing {
- mc.overridesMu.RLock()
- overrides := mc.compiledOverrides[provider]
- mc.overridesMu.RUnlock()
- if len(overrides) == 0 {
- return pricing
- }
+const (
+ ScopeKindGlobal ScopeKind = "global"
+ ScopeKindProvider ScopeKind = "provider"
+ ScopeKindProviderKey ScopeKind = "provider_key"
+ ScopeKindVirtualKey ScopeKind = "virtual_key"
+ ScopeKindVirtualKeyProvider ScopeKind = "virtual_key_provider"
+ ScopeKindVirtualKeyProviderKey ScopeKind = "virtual_key_provider_key"
+)
- modelCandidates := []string{model}
- mode := normalizeRequestType(requestType)
- best := selectBestOverride(overrides, modelCandidates, mode)
- if best == nil {
- return pricing
- }
+// MatchType controls how an override pattern is matched against model names.
+type MatchType string
- return patchPricing(pricing, best.override)
+const (
+ MatchTypeExact MatchType = "exact"
+ MatchTypeWildcard MatchType = "wildcard"
+)
+
+// PricingOverride describes a scoped pricing override shared across config storage,
+// model catalog compilation, and governance APIs.
+type PricingOverride struct {
+ ID string `json:"id"`
+ Name string `json:"name"`
+ ScopeKind ScopeKind `json:"scope_kind"`
+ VirtualKeyID *string `json:"virtual_key_id,omitempty"`
+ ProviderID *string `json:"provider_id,omitempty"`
+ ProviderKeyID *string `json:"provider_key_id,omitempty"`
+ MatchType MatchType `json:"match_type"`
+ Pattern string `json:"pattern"`
+ RequestTypes []schemas.RequestType `json:"request_types,omitempty"`
+ Options PricingOptions `json:"options"`
}
-func compileProviderPricingOverride(order int, override schemas.ProviderPricingOverride) (compiledProviderPricingOverride, error) {
- pattern := strings.TrimSpace(override.ModelPattern)
- if pattern == "" {
- return compiledProviderPricingOverride{}, fmt.Errorf("model_pattern cannot be empty")
- }
+// customPricingEntry is a single flattened override ready for lookup.
+type customPricingEntry struct {
+ id string
+ scopeKind ScopeKind
+ virtualKeyID string
+ providerID string
+ providerKeyID string
+ pattern string // exact model name, or wildcard prefix (trailing * stripped)
+ wildcard bool
+ requestModes map[string]struct{} // always non-nil for valid overrides
+ options PricingOptions
+}
- result := compiledProviderPricingOverride{
- override: override,
- requestModes: make(map[string]struct{}),
- order: order,
- }
- result.override.ModelPattern = pattern
+// customPricingData is the in-memory lookup structure for pricing overrides.
+// Exact matches are indexed by model name; wildcards are a flat slice.
+type customPricingData struct {
+ exact map[string][]customPricingEntry
+ wildcard []customPricingEntry
+}
- switch override.MatchType {
- case schemas.PricingOverrideMatchExact:
- result.literalChars = len(pattern)
- case schemas.PricingOverrideMatchWildcard:
- if !strings.Contains(pattern, "*") {
- return compiledProviderPricingOverride{}, fmt.Errorf("wildcard model_pattern must contain '*'")
+// IsValid validates the shared pricing override contract before persistence or runtime use.
+//
+// Input: override — the PricingOverride to validate (receiver).
+// Output: error — non-nil if any scope, pattern, or request-type constraint is violated.
+func (override *PricingOverride) IsValid() error {
+ if err := override.validateScopeKind(); err != nil {
+ return err
+ }
+ if err := override.validatePattern(); err != nil {
+ return err
+ }
+ return override.validateRequestTypes()
+}
+
+// validateScopeKind validates the scope identifiers required by override.ScopeKind.
+//
+// Input: override — receiver; ScopeKind and the three optional ID fields are inspected.
+// Output: error — non-nil when required identifiers are absent or forbidden ones are present.
+func (override *PricingOverride) validateScopeKind() error {
+ switch override.ScopeKind {
+ case ScopeKindGlobal:
+ if override.VirtualKeyID != nil || override.ProviderID != nil || override.ProviderKeyID != nil {
+ return fmt.Errorf("global scope_kind must not include scope identifiers")
}
- result.literalChars = len(strings.ReplaceAll(pattern, "*", ""))
- case schemas.PricingOverrideMatchRegex:
- re, err := regexp.Compile(pattern)
- if err != nil {
- return compiledProviderPricingOverride{}, fmt.Errorf("invalid regex model_pattern: %w", err)
+ case ScopeKindProvider:
+ if override.ProviderID == nil {
+ return fmt.Errorf("provider_id is required for provider scope_kind")
}
- result.regex = re
- result.literalChars = len(pattern)
- default:
- return compiledProviderPricingOverride{}, fmt.Errorf("unsupported match_type: %s", override.MatchType)
- }
-
- if len(override.RequestTypes) > 0 {
- result.hasRequestFilter = true
- for _, requestType := range override.RequestTypes {
- mode := normalizeRequestType(requestType)
- if mode == "unknown" {
- return compiledProviderPricingOverride{}, fmt.Errorf("unsupported request_type: %s", requestType)
- }
- result.requestModes[mode] = struct{}{}
+ if override.VirtualKeyID != nil || override.ProviderKeyID != nil {
+ return fmt.Errorf("provider scope_kind only supports provider_id")
+ }
+ case ScopeKindProviderKey:
+ if override.ProviderKeyID == nil {
+ return fmt.Errorf("provider_key_id is required for provider_key scope_kind")
+ }
+ if override.VirtualKeyID != nil || override.ProviderID != nil {
+ return fmt.Errorf("provider_key scope_kind only supports provider_key_id")
+ }
+ case ScopeKindVirtualKey:
+ if override.VirtualKeyID == nil {
+ return fmt.Errorf("virtual_key_id is required for virtual_key scope_kind")
+ }
+ if override.ProviderID != nil || override.ProviderKeyID != nil {
+ return fmt.Errorf("virtual_key scope_kind only supports virtual_key_id")
+ }
+ case ScopeKindVirtualKeyProvider:
+ if override.VirtualKeyID == nil || override.ProviderID == nil {
+ return fmt.Errorf("virtual_key_id and provider_id are required for virtual_key_provider scope_kind")
}
+ if override.ProviderKeyID != nil {
+ return fmt.Errorf("virtual_key_provider scope_kind does not support provider_key_id")
+ }
+ case ScopeKindVirtualKeyProviderKey:
+ if override.VirtualKeyID == nil || override.ProviderID == nil || override.ProviderKeyID == nil {
+ return fmt.Errorf("virtual_key_id, provider_id, and provider_key_id are required for virtual_key_provider_key scope_kind")
+ }
+ default:
+ return fmt.Errorf("unsupported scope_kind %q", override.ScopeKind)
}
-
- return result, nil
+ return nil
}
-func selectBestOverride(overrides []compiledProviderPricingOverride, modelCandidates []string, mode string) *compiledProviderPricingOverride {
- var best *compiledProviderPricingOverride
- for i := range overrides {
- candidate := &overrides[i]
- if candidate.hasRequestFilter {
- if _, ok := candidate.requestModes[mode]; !ok {
- continue
- }
+// validatePattern checks that Pattern is non-empty and consistent with MatchType.
+//
+// Input: override — receiver; Pattern and MatchType are inspected.
+// Output: error — non-nil when the pattern is empty, contains a wildcard for exact mode,
+//
+// or does not end with a single trailing "*" for wildcard mode.
+func (override *PricingOverride) validatePattern() error {
+ pattern := strings.TrimSpace(override.Pattern)
+ if pattern == "" {
+ return fmt.Errorf("pattern is required")
+ }
+ switch override.MatchType {
+ case MatchTypeExact:
+ if strings.Contains(pattern, "*") {
+ return fmt.Errorf("exact match pattern must not contain wildcards")
}
- if !matchesAnyModel(candidate, modelCandidates) {
- continue
+ case MatchTypeWildcard:
+ if !strings.HasSuffix(pattern, "*") {
+ return fmt.Errorf("wildcard pattern must end with *")
}
- if isBetterOverride(candidate, best) {
- best = candidate
+ if strings.Count(pattern, "*") != 1 {
+ return fmt.Errorf("wildcard pattern must contain exactly one trailing *")
}
+ default:
+ return fmt.Errorf("unsupported match_type %q", override.MatchType)
}
- return best
+ return nil
}
-func matchesAnyModel(override *compiledProviderPricingOverride, modelCandidates []string) bool {
- for _, model := range modelCandidates {
- if matchesModel(override, model) {
- return true
+// validateRequestTypes checks that RequestTypes is non-empty and that every entry is a
+// supported base request type. Stream variants (e.g. chat_completion_stream) are rejected —
+// the base type (chat_completion) already covers both streaming and non-streaming requests.
+//
+// Input: override — receiver; RequestTypes slice is inspected.
+// Output: error — non-nil if RequestTypes is empty, or contains an unsupported or stream variant.
+func (override *PricingOverride) validateRequestTypes() error {
+ if len(override.RequestTypes) == 0 {
+ return fmt.Errorf("request_types is required and must contain at least one value")
+ }
+ for _, rt := range override.RequestTypes {
+ if normalizeStreamRequestType(rt) != rt {
+ return fmt.Errorf("unsupported request_type %q: use the base type (e.g. %q covers both streaming and non-streaming)", rt, normalizeStreamRequestType(rt))
+ }
+ if normalizeRequestType(rt) == "unknown" {
+ return fmt.Errorf("unsupported request_type %q", rt)
}
}
- return false
+ return nil
}
-func matchesModel(override *compiledProviderPricingOverride, model string) bool {
- switch override.override.MatchType {
- case schemas.PricingOverrideMatchExact:
- return model == override.override.ModelPattern
- case schemas.PricingOverrideMatchWildcard:
- return wildcardMatch(override.override.ModelPattern, model)
- case schemas.PricingOverrideMatchRegex:
- return override.regex != nil && override.regex.MatchString(model)
- default:
- return false
+// matchesScope reports whether the entry's governance scope matches the runtime identifiers.
+//
+// Input: scopes — runtime VirtualKeyID, SelectedKeyID, and Provider to match against.
+// Output: bool — true when the entry's scope kind and stored IDs align with scopes.
+func (e *customPricingEntry) matchesScope(scopes PricingLookupScopes) bool {
+ switch e.scopeKind {
+ case ScopeKindGlobal:
+ return true
+ case ScopeKindProvider:
+ return e.providerID == scopes.Provider
+ case ScopeKindProviderKey:
+ return e.providerKeyID == scopes.SelectedKeyID
+ case ScopeKindVirtualKey:
+ return e.virtualKeyID == scopes.VirtualKeyID
+ case ScopeKindVirtualKeyProvider:
+ return e.virtualKeyID == scopes.VirtualKeyID && e.providerID == scopes.Provider
+ case ScopeKindVirtualKeyProviderKey:
+ return e.virtualKeyID == scopes.VirtualKeyID && e.providerID == scopes.Provider && e.providerKeyID == scopes.SelectedKeyID
}
+ return false
}
-func overridePriority(matchType schemas.PricingOverrideMatchType) int {
- switch matchType {
- case schemas.PricingOverrideMatchExact:
- return 0
- case schemas.PricingOverrideMatchWildcard:
- return 1
- case schemas.PricingOverrideMatchRegex:
- return 2
- default:
- return 3
- }
+// matchesMode reports whether the entry applies to the given normalized request mode.
+//
+// Input: mode — normalized request type string (e.g. "chat", "embedding").
+// Output: bool — true when requestModes contains mode.
+func (e *customPricingEntry) matchesMode(mode string) bool {
+ _, ok := e.requestModes[mode]
+ return ok
}
-func isBetterOverride(candidate, best *compiledProviderPricingOverride) bool {
- if best == nil {
- return true
- }
-
- candidatePriority := overridePriority(candidate.override.MatchType)
- bestPriority := overridePriority(best.override.MatchType)
- if candidatePriority != bestPriority {
- return candidatePriority < bestPriority
+// resolve walks the 6-scope priority hierarchy and returns the first matching
+// pricing patch for the given model, request mode, and runtime scopes.
+//
+// Input: model — exact model name being priced.
+//
+// mode — normalized request type string (e.g. "chat", "embedding").
+// scopes — runtime governance identifiers used to narrow the scope search.
+//
+// Output: *PricingOptions — pointer to the first matching override's options, or nil if none match.
+func (c *customPricingData) resolve(model, mode string, scopes PricingLookupScopes) *PricingOptions {
+ for _, scopeKind := range scopePriorityOrder(scopes) {
+ for i := range c.exact[model] {
+ e := &c.exact[model][i]
+ if e.scopeKind == scopeKind && e.matchesScope(scopes) && e.matchesMode(mode) {
+ return &e.options
+ }
+ }
+ for i := range c.wildcard {
+ e := &c.wildcard[i]
+ if e.scopeKind == scopeKind && e.matchesScope(scopes) && strings.HasPrefix(model, e.pattern) && e.matchesMode(mode) {
+ return &e.options
+ }
+ }
}
+ return nil
+}
- if candidate.hasRequestFilter != best.hasRequestFilter {
- return candidate.hasRequestFilter
- }
+// scopePriorityOrder returns scope kinds in most-specific-first order,
+// skipping scopes that can't match given the available runtime identifiers.
+//
+// Input: scopes — runtime governance identifiers; empty fields cause the corresponding scope kinds to be omitted.
+// Output: []ScopeKind — ordered list from most-specific (VirtualKeyProviderKey) to least-specific (Global).
+func scopePriorityOrder(scopes PricingLookupScopes) []ScopeKind {
+ order := make([]ScopeKind, 0, 6)
+ if scopes.VirtualKeyID != "" && scopes.Provider != "" && scopes.SelectedKeyID != "" {
+ order = append(order, ScopeKindVirtualKeyProviderKey)
+ }
+ if scopes.VirtualKeyID != "" && scopes.Provider != "" {
+ order = append(order, ScopeKindVirtualKeyProvider)
+ }
+ if scopes.VirtualKeyID != "" {
+ order = append(order, ScopeKindVirtualKey)
+ }
+ if scopes.SelectedKeyID != "" {
+ order = append(order, ScopeKindProviderKey)
+ }
+ if scopes.Provider != "" {
+ order = append(order, ScopeKindProvider)
+ }
+ order = append(order, ScopeKindGlobal)
+ return order
+}
- if candidate.literalChars != best.literalChars {
- return candidate.literalChars > best.literalChars
+// buildCustomPricingData constructs a customPricingData lookup structure from a raw override slice.
+//
+// Input: overrides — slice of validated PricingOverride records loaded from the config store.
+// Output: *customPricingData — ready-to-query structure with exact and wildcard indexes populated.
+func buildCustomPricingData(overrides []PricingOverride) *customPricingData {
+ data := &customPricingData{
+ exact: make(map[string][]customPricingEntry, len(overrides)),
+ }
+ for _, o := range overrides {
+ entry := customPricingEntry{
+ id: o.ID,
+ scopeKind: o.ScopeKind,
+ options: o.Options,
+ }
+ if o.VirtualKeyID != nil {
+ entry.virtualKeyID = *o.VirtualKeyID
+ }
+ if o.ProviderID != nil {
+ entry.providerID = *o.ProviderID
+ }
+ if o.ProviderKeyID != nil {
+ entry.providerKeyID = *o.ProviderKeyID
+ }
+ entry.requestModes = make(map[string]struct{}, len(o.RequestTypes))
+ for _, rt := range o.RequestTypes {
+ entry.requestModes[normalizeRequestType(rt)] = struct{}{}
+ }
+ pattern := strings.TrimSpace(o.Pattern)
+ switch o.MatchType {
+ case MatchTypeExact:
+ entry.pattern = pattern
+ data.exact[pattern] = append(data.exact[pattern], entry)
+ case MatchTypeWildcard:
+ entry.pattern = strings.TrimSuffix(pattern, "*")
+ entry.wildcard = true
+ data.wildcard = append(data.wildcard, entry)
+ }
}
-
- return candidate.order < best.order
+ // Sort wildcards by descending prefix length so more-specific patterns (e.g. "gpt-4*")
+ // are checked before broader ones (e.g. "gpt-*"), making precedence deterministic.
+ sort.Slice(data.wildcard, func(i, j int) bool {
+ return len(data.wildcard[i].pattern) > len(data.wildcard[j].pattern)
+ })
+ return data
}
-func wildcardMatch(pattern, model string) bool {
- parts := strings.Split(pattern, "*")
- if len(parts) == 1 {
- return model == pattern
- }
+// applyPricingOverrides resolves any active scoped pricing override for the given model
+// and request type, then patches the catalog base pricing with the override values.
+// It returns the original pricing unchanged when no custom pricing tree is loaded or
+// when the request type cannot be mapped to a known pricing mode.
+//
+// Input: model — exact model name being priced.
+//
+// requestType — the request type used to derive the pricing mode.
+// pricing — base pricing row from the catalog to patch.
+// scopes — runtime governance identifiers used to narrow the override scope.
+//
+// Output: TableModelPricing — patched pricing row, or pricing unchanged if no override matches.
+// bool — true when an override was applied, false otherwise.
+func (mc *ModelCatalog) applyPricingOverrides(model string, requestType schemas.RequestType, pricing configstoreTables.TableModelPricing, scopes PricingLookupScopes) (configstoreTables.TableModelPricing, bool) {
+ mc.overridesMu.RLock()
+ custom := mc.customPricing
+ mc.overridesMu.RUnlock()
- remaining := model
- if parts[0] != "" {
- if !strings.HasPrefix(remaining, parts[0]) {
- return false
- }
- remaining = remaining[len(parts[0]):]
+ if custom == nil {
+ return pricing, false
}
- for i := 1; i < len(parts)-1; i++ {
- part := parts[i]
- if part == "" {
- continue
- }
- index := strings.Index(remaining, part)
- if index < 0 {
- return false
- }
- remaining = remaining[index+len(part):]
+ mode := normalizeRequestType(requestType)
+ if mode == "unknown" {
+ return pricing, false
}
- last := parts[len(parts)-1]
- if last == "" {
- return true
+ if patch := custom.resolve(model, mode, scopes); patch != nil {
+ return patchPricing(pricing, *patch), true
}
- return strings.HasSuffix(remaining, last)
+ return pricing, false
}
-func patchPricing(pricing configstoreTables.TableModelPricing, override schemas.ProviderPricingOverride) configstoreTables.TableModelPricing {
+// patchPricing applies override values onto a copy of the base pricing row.
+// For all fields, a non-nil override pointer replaces the corresponding destination value;
+// a nil override leaves the base value intact.
+// The original pricing row is never modified; a patched copy is always returned.
+//
+// Input: pricing — base pricing row from the catalog.
+//
+// override — pricing options sourced from the matched override entry.
+//
+// Output: TableModelPricing — shallow copy of pricing with override fields applied.
+func patchPricing(pricing configstoreTables.TableModelPricing, override PricingOptions) configstoreTables.TableModelPricing {
patched := pricing
- if override.InputCostPerToken != nil {
- patched.InputCostPerToken = *override.InputCostPerToken
- }
- if override.OutputCostPerToken != nil {
- patched.OutputCostPerToken = *override.OutputCostPerToken
- }
- if override.InputCostPerVideoPerSecond != nil {
- patched.InputCostPerVideoPerSecond = override.InputCostPerVideoPerSecond
- }
- if override.InputCostPerAudioPerSecond != nil {
- patched.InputCostPerAudioPerSecond = override.InputCostPerAudioPerSecond
- }
- if override.InputCostPerTokenAbove200kTokens != nil {
- patched.InputCostPerTokenAbove200kTokens = override.InputCostPerTokenAbove200kTokens
- }
- if override.OutputCostPerTokenAbove200kTokens != nil {
- patched.OutputCostPerTokenAbove200kTokens = override.OutputCostPerTokenAbove200kTokens
- }
- if override.CacheCreationInputTokenCostAbove200kTokens != nil {
- patched.CacheCreationInputTokenCostAbove200kTokens = override.CacheCreationInputTokenCostAbove200kTokens
- }
- if override.CacheReadInputTokenCostAbove200kTokens != nil {
- patched.CacheReadInputTokenCostAbove200kTokens = override.CacheReadInputTokenCostAbove200kTokens
- }
- if override.CacheReadInputTokenCost != nil {
- patched.CacheReadInputTokenCost = override.CacheReadInputTokenCost
- }
- if override.CacheCreationInputTokenCost != nil {
- patched.CacheCreationInputTokenCost = override.CacheCreationInputTokenCost
- }
- if override.InputCostPerTokenBatches != nil {
- patched.InputCostPerTokenBatches = override.InputCostPerTokenBatches
- }
- if override.OutputCostPerTokenBatches != nil {
- patched.OutputCostPerTokenBatches = override.OutputCostPerTokenBatches
- }
- if override.InputCostPerImage != nil {
- patched.InputCostPerImage = override.InputCostPerImage
- }
- if override.OutputCostPerImage != nil {
- patched.OutputCostPerImage = override.OutputCostPerImage
- }
- if override.OutputCostPerImageLowQuality != nil {
- patched.OutputCostPerImageLowQuality = override.OutputCostPerImageLowQuality
- }
- if override.OutputCostPerImageMediumQuality != nil {
- patched.OutputCostPerImageMediumQuality = override.OutputCostPerImageMediumQuality
+ for _, field := range []struct {
+ dst **float64
+ src *float64
+ }{
+ {dst: &patched.InputCostPerToken, src: override.InputCostPerToken},
+ {dst: &patched.OutputCostPerToken, src: override.OutputCostPerToken},
+ {dst: &patched.InputCostPerTokenPriority, src: override.InputCostPerTokenPriority},
+ {dst: &patched.OutputCostPerTokenPriority, src: override.OutputCostPerTokenPriority},
+ {dst: &patched.InputCostPerVideoPerSecond, src: override.InputCostPerVideoPerSecond},
+ {dst: &patched.OutputCostPerVideoPerSecond, src: override.OutputCostPerVideoPerSecond},
+ {dst: &patched.OutputCostPerSecond, src: override.OutputCostPerSecond},
+ {dst: &patched.InputCostPerAudioPerSecond, src: override.InputCostPerAudioPerSecond},
+ {dst: &patched.InputCostPerSecond, src: override.InputCostPerSecond},
+ {dst: &patched.InputCostPerAudioToken, src: override.InputCostPerAudioToken},
+ {dst: &patched.OutputCostPerAudioToken, src: override.OutputCostPerAudioToken},
+ {dst: &patched.InputCostPerCharacter, src: override.InputCostPerCharacter},
+ {dst: &patched.InputCostPerTokenAbove128kTokens, src: override.InputCostPerTokenAbove128kTokens},
+ {dst: &patched.InputCostPerImageAbove128kTokens, src: override.InputCostPerImageAbove128kTokens},
+ {dst: &patched.InputCostPerVideoPerSecondAbove128kTokens, src: override.InputCostPerVideoPerSecondAbove128kTokens},
+ {dst: &patched.InputCostPerAudioPerSecondAbove128kTokens, src: override.InputCostPerAudioPerSecondAbove128kTokens},
+ {dst: &patched.OutputCostPerTokenAbove128kTokens, src: override.OutputCostPerTokenAbove128kTokens},
+ {dst: &patched.InputCostPerTokenAbove200kTokens, src: override.InputCostPerTokenAbove200kTokens},
+ {dst: &patched.OutputCostPerTokenAbove200kTokens, src: override.OutputCostPerTokenAbove200kTokens},
+ {dst: &patched.CacheCreationInputTokenCostAbove200kTokens, src: override.CacheCreationInputTokenCostAbove200kTokens},
+ {dst: &patched.CacheReadInputTokenCostAbove200kTokens, src: override.CacheReadInputTokenCostAbove200kTokens},
+ {dst: &patched.CacheReadInputTokenCost, src: override.CacheReadInputTokenCost},
+ {dst: &patched.CacheCreationInputTokenCost, src: override.CacheCreationInputTokenCost},
+ {dst: &patched.CacheCreationInputTokenCostAbove1hr, src: override.CacheCreationInputTokenCostAbove1hr},
+ {dst: &patched.CacheCreationInputTokenCostAbove1hrAbove200kTokens, src: override.CacheCreationInputTokenCostAbove1hrAbove200kTokens},
+ {dst: &patched.CacheCreationInputAudioTokenCost, src: override.CacheCreationInputAudioTokenCost},
+ {dst: &patched.CacheReadInputTokenCostPriority, src: override.CacheReadInputTokenCostPriority},
+ {dst: &patched.InputCostPerTokenBatches, src: override.InputCostPerTokenBatches},
+ {dst: &patched.OutputCostPerTokenBatches, src: override.OutputCostPerTokenBatches},
+ {dst: &patched.InputCostPerImageToken, src: override.InputCostPerImageToken},
+ {dst: &patched.OutputCostPerImageToken, src: override.OutputCostPerImageToken},
+ {dst: &patched.InputCostPerImage, src: override.InputCostPerImage},
+ {dst: &patched.OutputCostPerImage, src: override.OutputCostPerImage},
+ {dst: &patched.InputCostPerPixel, src: override.InputCostPerPixel},
+ {dst: &patched.OutputCostPerPixel, src: override.OutputCostPerPixel},
+ {dst: &patched.OutputCostPerImagePremiumImage, src: override.OutputCostPerImagePremiumImage},
+ {dst: &patched.OutputCostPerImageAbove512x512Pixels, src: override.OutputCostPerImageAbove512x512Pixels},
+ {dst: &patched.OutputCostPerImageAbove512x512PixelsPremium, src: override.OutputCostPerImageAbove512x512PixelsPremium},
+ {dst: &patched.OutputCostPerImageAbove1024x1024Pixels, src: override.OutputCostPerImageAbove1024x1024Pixels},
+ {dst: &patched.OutputCostPerImageAbove1024x1024PixelsPremium, src: override.OutputCostPerImageAbove1024x1024PixelsPremium},
+ {dst: &patched.OutputCostPerImageAbove2048x2048Pixels, src: override.OutputCostPerImageAbove2048x2048Pixels},
+ {dst: &patched.OutputCostPerImageAbove4096x4096Pixels, src: override.OutputCostPerImageAbove4096x4096Pixels},
+ {dst: &patched.CacheReadInputImageTokenCost, src: override.CacheReadInputImageTokenCost},
+ {dst: &patched.SearchContextCostPerQuery, src: override.SearchContextCostPerQuery},
+ {dst: &patched.CodeInterpreterCostPerSession, src: override.CodeInterpreterCostPerSession},
+ {dst: &patched.OutputCostPerImageLowQuality, src: override.OutputCostPerImageLowQuality},
+ {dst: &patched.OutputCostPerImageMediumQuality, src: override.OutputCostPerImageMediumQuality},
+ {dst: &patched.OutputCostPerImageHighQuality, src: override.OutputCostPerImageHighQuality},
+ {dst: &patched.OutputCostPerImageAutoQuality, src: override.OutputCostPerImageAutoQuality},
+ } {
+ if field.src != nil {
+ *field.dst = field.src
+ }
}
- if override.OutputCostPerImageHighQuality != nil {
- patched.OutputCostPerImageHighQuality = override.OutputCostPerImageHighQuality
+ return patched
+}
+
+func (mc *ModelCatalog) loadPricingOverridesFromStore(ctx context.Context) error {
+ if mc.configStore == nil {
+ return nil
}
- if override.OutputCostPerImageAutoQuality != nil {
- patched.OutputCostPerImageAutoQuality = override.OutputCostPerImageAutoQuality
+ rows, err := mc.configStore.GetPricingOverrides(ctx, configstore.PricingOverrideFilters{})
+ if err != nil {
+ return err
}
-
- return patched
+ return mc.SetPricingOverrides(rows)
}
diff --git a/framework/modelcatalog/overrides_test.go b/framework/modelcatalog/overrides_test.go
index 5f2ae1df49..f073dd65c8 100644
--- a/framework/modelcatalog/overrides_test.go
+++ b/framework/modelcatalog/overrides_test.go
@@ -3,6 +3,7 @@ package modelcatalog
import (
"testing"
+ bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
"github.com/stretchr/testify/assert"
@@ -22,150 +23,180 @@ func (noOpLogger) LogHTTPRequest(schemas.LogLevel, string) schemas.LogEventBuild
return schemas.NoopLogEvent
}
-func TestSetProviderPricingOverrides_InvalidRegex(t *testing.T) {
- t.Skip()
- mc := newTestCatalog(nil, nil)
- err := mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
- {
- ModelPattern: "[",
- MatchType: schemas.PricingOverrideMatchRegex,
- },
- })
- require.Error(t, err)
-}
-
-func TestGetPricing_OverridePrecedenceExactWildcardRegex(t *testing.T) {
- t.Skip()
+func TestGetPricing_OverridePrecedenceExactWildcard(t *testing.T) {
mc := newTestCatalog(nil, nil)
mc.logger = noOpLogger{}
mc.pricingData[makeKey("gpt-4o", "openai", "chat")] = configstoreTables.TableModelPricing{
Model: "gpt-4o",
Provider: "openai",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- exact := 20.0
- wildcard := 10.0
- regex := 30.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
- {
- ModelPattern: "gpt-*",
- MatchType: schemas.PricingOverrideMatchWildcard,
- InputCostPerToken: &wildcard,
- },
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "^gpt-.*$",
- MatchType: schemas.PricingOverrideMatchRegex,
- InputCostPerToken: ®ex,
+ ID: "openai-override-0",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeWildcard),
+ Pattern: "gpt-*",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":10}`,
},
{
- ModelPattern: "gpt-4o",
- MatchType: schemas.PricingOverrideMatchExact,
- InputCostPerToken: &exact,
+ ID: "openai-override-1",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":20}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
- assert.Equal(t, 20.0, pricing.InputCostPerToken)
- assert.Equal(t, 2.0, pricing.OutputCostPerToken)
+ require.NotNil(t, pricing.InputCostPerToken)
+ assert.Equal(t, 20.0, *pricing.InputCostPerToken)
}
-func TestGetPricing_WildcardBeatsRegex(t *testing.T) {
+func TestGetPricing_RequestTypeSpecificOverrideBeatsGeneric(t *testing.T) {
t.Skip()
mc := newTestCatalog(nil, nil)
mc.logger = noOpLogger{}
- mc.pricingData[makeKey("gpt-4o-mini", "openai", "chat")] = configstoreTables.TableModelPricing{
- Model: "gpt-4o-mini",
+ mc.pricingData[makeKey("gpt-4o", "openai", "responses")] = configstoreTables.TableModelPricing{
+ Model: "gpt-4o",
Provider: "openai",
- Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ Mode: "responses",
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- wildcard := 11.0
- regex := 12.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "^gpt-4o.*$",
- MatchType: schemas.PricingOverrideMatchRegex,
- InputCostPerToken: ®ex,
+ ID: "openai-generic",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ PricingPatchJSON: `{"input_cost_per_token":9}`,
},
{
- ModelPattern: "gpt-4o*",
- MatchType: schemas.PricingOverrideMatchWildcard,
- InputCostPerToken: &wildcard,
+ ID: "openai-specific",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ RequestTypes: []schemas.RequestType{schemas.ResponsesRequest},
+ PricingPatchJSON: `{"input_cost_per_token":15}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o-mini", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
- assert.Equal(t, 11.0, pricing.InputCostPerToken)
+ assert.Equal(t, 15.0, pricing.InputCostPerToken)
}
-func TestGetPricing_RequestTypeSpecificOverrideBeatsGeneric(t *testing.T) {
+func TestGetPricing_AppliesOverrideAfterFallbackResolution(t *testing.T) {
t.Skip()
mc := newTestCatalog(nil, nil)
mc.logger = noOpLogger{}
- mc.pricingData[makeKey("gpt-4o", "openai", "responses")] = configstoreTables.TableModelPricing{
+ mc.pricingData[makeKey("gpt-4o", "vertex", "chat")] = configstoreTables.TableModelPricing{
Model: "gpt-4o",
- Provider: "openai",
- Mode: "responses",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ Provider: "vertex",
+ Mode: "chat",
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- specific := 15.0
- generic := 9.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
+ geminiProviderID := "gemini"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "gpt-4o",
- MatchType: schemas.PricingOverrideMatchExact,
- InputCostPerToken: &generic,
+ ID: "gemini-override",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &geminiProviderID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ PricingPatchJSON: `{"input_cost_per_token":7}`,
},
+ }))
+
+ pricing := mc.resolvePricing("gemini", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"})
+ require.NotNil(t, pricing)
+ assert.Equal(t, 7.0, pricing.InputCostPerToken)
+}
+
+func TestGetPricing_DeploymentLookupUsesRequestedModelForOverrideMatching(t *testing.T) {
+ mc := newTestCatalog(nil, nil)
+ mc.logger = noOpLogger{}
+ mc.pricingData[makeKey("dep-gpt4o", "openai", "chat")] = configstoreTables.TableModelPricing{
+ Model: "dep-gpt4o",
+ Provider: "openai",
+ Mode: "chat",
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
+ }
+
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "gpt-4o",
- MatchType: schemas.PricingOverrideMatchExact,
- RequestTypes: []schemas.RequestType{schemas.ResponsesRequest},
- InputCostPerToken: &specific,
+ ID: "requested-model-override",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":7}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ResponsesRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "gpt-4o", "dep-gpt4o", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
- assert.Equal(t, 15.0, pricing.InputCostPerToken)
+ require.NotNil(t, pricing.InputCostPerToken)
+ assert.Equal(t, 7.0, *pricing.InputCostPerToken)
}
-func TestGetPricing_AppliesOverrideAfterFallbackResolution(t *testing.T) {
- t.Skip()
+func TestGetPricing_FallbackUsesRequestedProviderForScopeMatching(t *testing.T) {
mc := newTestCatalog(nil, nil)
mc.logger = noOpLogger{}
mc.pricingData[makeKey("gpt-4o", "vertex", "chat")] = configstoreTables.TableModelPricing{
Model: "gpt-4o",
Provider: "vertex",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- override := 7.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.Gemini, []schemas.ProviderPricingOverride{
+ geminiProviderID := "gemini"
+ vertexProviderID := "vertex"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
+ {
+ ID: "gemini-provider-override",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &geminiProviderID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":5}`,
+ },
{
- ModelPattern: "gpt-4o",
- MatchType: schemas.PricingOverrideMatchExact,
- InputCostPerToken: &override,
+ ID: "vertex-provider-override",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &vertexProviderID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":9}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o", "gemini", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("gemini", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"})
require.NotNil(t, pricing)
- assert.Equal(t, 7.0, pricing.InputCostPerToken)
+ require.NotNil(t, pricing.InputCostPerToken)
+ assert.Equal(t, 5.0, *pricing.InputCostPerToken)
}
func TestGetPricing_ExactOverrideDoesNotMatchProviderPrefixedModel(t *testing.T) {
@@ -176,21 +207,23 @@ func TestGetPricing_ExactOverrideDoesNotMatchProviderPrefixedModel(t *testing.T)
Model: "openai/gpt-4o",
Provider: "openai",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- override := 19.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "gpt-4o",
- MatchType: schemas.PricingOverrideMatchExact,
- InputCostPerToken: &override,
+ ID: "openai-override-0",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ PricingPatchJSON: `{"input_cost_per_token":19}`,
},
}))
- pricing, ok := mc.getPricing("openai/gpt-4o", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "openai/gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
assert.Equal(t, 1.0, pricing.InputCostPerToken)
}
@@ -204,22 +237,24 @@ func TestGetPricing_NoMatchingOverrideLeavesPricingUnchanged(t *testing.T) {
Model: "gpt-4o",
Provider: "openai",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
CacheReadInputTokenCost: &baseCacheRead,
}
- override := 9.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "claude-*",
- MatchType: schemas.PricingOverrideMatchWildcard,
- InputCostPerToken: &override,
+ ID: "openai-override-0",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeWildcard),
+ Pattern: "claude-*",
+ PricingPatchJSON: `{"input_cost_per_token":9}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
assert.Equal(t, 1.0, pricing.InputCostPerToken)
assert.Equal(t, 2.0, pricing.OutputCostPerToken)
@@ -235,28 +270,29 @@ func TestDeleteProviderPricingOverrides_StopsApplying(t *testing.T) {
Model: "gpt-4o",
Provider: "openai",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- override := 11.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "gpt-4o",
- MatchType: schemas.PricingOverrideMatchExact,
- InputCostPerToken: &override,
+ ID: "openai-override-0",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-4o",
+ PricingPatchJSON: `{"input_cost_per_token":11}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
assert.Equal(t, 11.0, pricing.InputCostPerToken)
- mc.DeleteProviderPricingOverrides(schemas.OpenAI)
+ require.NoError(t, mc.SetPricingOverrides(nil))
- pricing, ok = mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing = mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
assert.Equal(t, 1.0, pricing.InputCostPerToken)
}
@@ -269,62 +305,74 @@ func TestGetPricing_WildcardSpecificityLongerLiteralWins(t *testing.T) {
Model: "gpt-4o-mini",
Provider: "openai",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- generic := 5.0
- specific := 6.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "gpt-*",
- MatchType: schemas.PricingOverrideMatchWildcard,
- InputCostPerToken: &generic,
+ ID: "openai-override-0",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeWildcard),
+ Pattern: "gpt-*",
+ PricingPatchJSON: `{"input_cost_per_token":5}`,
},
{
- ModelPattern: "gpt-4o*",
- MatchType: schemas.PricingOverrideMatchWildcard,
- InputCostPerToken: &specific,
+ ID: "openai-override-1",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeWildcard),
+ Pattern: "gpt-4o*",
+ PricingPatchJSON: `{"input_cost_per_token":6}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o-mini", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "gpt-4o-mini", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
assert.Equal(t, 6.0, pricing.InputCostPerToken)
}
-func TestGetPricing_ConfigOrderTiebreakFirstWinsWhenEqual(t *testing.T) {
- t.Skip()
+// TestGetPricing_FirstInsertionWinsOnTie verifies that when multiple wildcard overrides
+// match the same model and scope, the first one inserted takes precedence.
+func TestGetPricing_FirstInsertionWinsOnTie(t *testing.T) {
mc := newTestCatalog(nil, nil)
mc.logger = noOpLogger{}
mc.pricingData[makeKey("gpt-4o-mini", "openai", "chat")] = configstoreTables.TableModelPricing{
Model: "gpt-4o-mini",
Provider: "openai",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
}
- first := 8.0
- second := 9.0
- require.NoError(t, mc.SetProviderPricingOverrides(schemas.OpenAI, []schemas.ProviderPricingOverride{
+ providerID := "openai"
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
{
- ModelPattern: "gpt-4o*",
- MatchType: schemas.PricingOverrideMatchWildcard,
- InputCostPerToken: &first,
+ ID: "a-override",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeWildcard),
+ Pattern: "gpt-4o*",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":8}`,
},
{
- ModelPattern: "gpt-4o*",
- MatchType: schemas.PricingOverrideMatchWildcard,
- InputCostPerToken: &second,
+ ID: "b-override",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerID,
+ MatchType: string(MatchTypeWildcard),
+ Pattern: "gpt-4o*",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":9}`,
},
}))
- pricing, ok := mc.getPricing("gpt-4o-mini", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
+ pricing := mc.resolvePricing("openai", "gpt-4o-mini", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
require.NotNil(t, pricing)
- assert.Equal(t, 8.0, pricing.InputCostPerToken)
+ require.NotNil(t, pricing.InputCostPerToken)
+ assert.Equal(t, 8.0, *pricing.InputCostPerToken)
}
func TestPatchPricing_PartialPatchOnlyChangesSpecifiedFields(t *testing.T) {
@@ -335,26 +383,122 @@ func TestPatchPricing_PartialPatchOnlyChangesSpecifiedFields(t *testing.T) {
Model: "gpt-4o",
Provider: "openai",
Mode: "chat",
- InputCostPerToken: 1,
- OutputCostPerToken: 2,
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
CacheReadInputTokenCost: &baseCacheRead,
InputCostPerImage: &baseInputImage,
}
- patched := patchPricing(base, schemas.ProviderPricingOverride{
- ModelPattern: "gpt-4o",
- MatchType: schemas.PricingOverrideMatchExact,
- InputCostPerToken: schemas.Ptr(3.0),
- CacheReadInputTokenCost: schemas.Ptr(0.9),
+ cacheRead := 0.9
+ patched := patchPricing(base, PricingOptions{
+ InputCostPerToken: bifrost.Ptr(3.0),
+ CacheReadInputTokenCost: &cacheRead,
})
- // Changed fields
assert.Equal(t, 3.0, patched.InputCostPerToken)
require.NotNil(t, patched.CacheReadInputTokenCost)
assert.Equal(t, 0.9, *patched.CacheReadInputTokenCost)
- // Unchanged fields
assert.Equal(t, 2.0, patched.OutputCostPerToken)
require.NotNil(t, patched.InputCostPerImage)
assert.Equal(t, 0.7, *patched.InputCostPerImage)
}
+
+func TestApplyScopedPricingOverrides_ScopePrecedence(t *testing.T) {
+ mc := newTestCatalog(nil, nil)
+ mc.logger = noOpLogger{}
+
+ providerScopeID := "openai"
+ providerKeyScopeID := "provider-key-1"
+ virtualKeyScopeID := "virtual-key-1"
+
+ require.NoError(t, mc.SetPricingOverrides([]configstoreTables.TablePricingOverride{
+ {
+ ID: "global",
+ ScopeKind: string(ScopeKindGlobal),
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-5-nano",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":2}`,
+ },
+ {
+ ID: "provider",
+ ScopeKind: string(ScopeKindProvider),
+ ProviderID: &providerScopeID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-5-nano",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":3}`,
+ },
+ {
+ ID: "provider-key",
+ ScopeKind: string(ScopeKindProviderKey),
+ ProviderKeyID: &providerKeyScopeID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-5-nano",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":4}`,
+ },
+ {
+ ID: "virtual-key",
+ ScopeKind: string(ScopeKindVirtualKey),
+ VirtualKeyID: &virtualKeyScopeID,
+ MatchType: string(MatchTypeExact),
+ Pattern: "gpt-5-nano",
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ PricingPatchJSON: `{"input_cost_per_token":5}`,
+ },
+ }))
+
+ base := configstoreTables.TableModelPricing{
+ Model: "gpt-5-nano",
+ Provider: "openai",
+ Mode: "chat",
+ InputCostPerToken: bifrost.Ptr(1.0),
+ OutputCostPerToken: bifrost.Ptr(2.0),
+ }
+
+ tests := []struct {
+ name string
+ scopes PricingLookupScopes
+ expected float64
+ }{
+ {
+ name: "virtual key wins over provider key, provider and global",
+ scopes: PricingLookupScopes{
+ VirtualKeyID: virtualKeyScopeID,
+ SelectedKeyID: providerKeyScopeID,
+ Provider: providerScopeID,
+ },
+ expected: 5.0,
+ },
+ {
+ name: "provider key wins over provider and global",
+ scopes: PricingLookupScopes{
+ SelectedKeyID: providerKeyScopeID,
+ Provider: providerScopeID,
+ },
+ expected: 4.0,
+ },
+ {
+ name: "provider wins over global",
+ scopes: PricingLookupScopes{
+ Provider: providerScopeID,
+ },
+ expected: 3.0,
+ },
+ {
+ name: "global applies when no narrower scope is provided",
+ scopes: PricingLookupScopes{},
+ expected: 2.0,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ patched, applied := mc.applyPricingOverrides("gpt-5-nano", schemas.ChatCompletionRequest, base, tc.scopes)
+ require.True(t, applied)
+ assert.Equal(t, tc.expected, patched.InputCostPerToken)
+ })
+ }
+}
diff --git a/framework/modelcatalog/pricing.go b/framework/modelcatalog/pricing.go
index decb3e78ea..b9d7525f2f 100644
--- a/framework/modelcatalog/pricing.go
+++ b/framework/modelcatalog/pricing.go
@@ -23,22 +23,29 @@ type costInput struct {
// CalculateCost calculates the cost of a Bifrost response.
// It handles all request types, cache debug billing, and tiered pricing.
-func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse) float64 {
+// If scopes is nil, an empty PricingLookupScopes is used; global and provider-scoped
+// overrides may still apply since the provider is derived from the response.
+func (mc *ModelCatalog) CalculateCost(result *schemas.BifrostResponse, scopes *PricingLookupScopes) float64 {
if result == nil {
return 0
}
+ var s PricingLookupScopes
+ if scopes != nil {
+ s = *scopes
+ }
+
// Handle semantic cache billing
cacheDebug := result.GetExtraFields().CacheDebug
if cacheDebug != nil {
- return mc.calculateCostWithCache(result, cacheDebug)
+ return mc.calculateCostWithCache(result, cacheDebug, s)
}
- return mc.calculateBaseCost(result)
+ return mc.calculateBaseCost(result, s)
}
// calculateCostWithCache handles cost calculation when semantic cache debug info is present.
-func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse, cacheDebug *schemas.BifrostCacheDebug) float64 {
+func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse, cacheDebug *schemas.BifrostCacheDebug, scopes PricingLookupScopes) float64 {
if cacheDebug.CacheHit {
// Direct cache hit — no LLM call, no cost
if cacheDebug.HitType != nil && *cacheDebug.HitType == "direct" {
@@ -46,31 +53,34 @@ func (mc *ModelCatalog) calculateCostWithCache(result *schemas.BifrostResponse,
}
// Semantic cache hit — only the embedding lookup cost
if cacheDebug.ProviderUsed != nil && cacheDebug.ModelUsed != nil && cacheDebug.InputTokens != nil {
- return mc.computeCacheEmbeddingCost(cacheDebug)
+ return mc.computeCacheEmbeddingCost(cacheDebug, scopes)
}
return 0
}
// Cache miss — full LLM cost + embedding lookup cost
- baseCost := mc.calculateBaseCost(result)
- embeddingCost := mc.computeCacheEmbeddingCost(cacheDebug)
+ baseCost := mc.calculateBaseCost(result, scopes)
+ embeddingCost := mc.computeCacheEmbeddingCost(cacheDebug, scopes)
return baseCost + embeddingCost
}
// computeCacheEmbeddingCost calculates the embedding cost for a semantic cache lookup.
-func (mc *ModelCatalog) computeCacheEmbeddingCost(cacheDebug *schemas.BifrostCacheDebug) float64 {
+func (mc *ModelCatalog) computeCacheEmbeddingCost(cacheDebug *schemas.BifrostCacheDebug, scopes PricingLookupScopes) float64 {
if cacheDebug == nil || cacheDebug.ProviderUsed == nil || cacheDebug.ModelUsed == nil || cacheDebug.InputTokens == nil {
return 0
}
- pricing, exists := mc.getPricing(*cacheDebug.ModelUsed, *cacheDebug.ProviderUsed, schemas.EmbeddingRequest)
- if !exists {
+ if scopes.Provider == "" {
+ scopes.Provider = *cacheDebug.ProviderUsed
+ }
+ pricing := mc.resolvePricing(*cacheDebug.ProviderUsed, *cacheDebug.ModelUsed, "", schemas.EmbeddingRequest, scopes)
+ if pricing == nil {
return 0
}
return float64(*cacheDebug.InputTokens) * tieredInputRate(pricing, *cacheDebug.InputTokens)
}
// calculateBaseCost extracts usage from the response and routes to the appropriate compute function.
-func (mc *ModelCatalog) calculateBaseCost(result *schemas.BifrostResponse) float64 {
+func (mc *ModelCatalog) calculateBaseCost(result *schemas.BifrostResponse, scopes PricingLookupScopes) float64 {
extraFields := result.GetExtraFields()
if extraFields == nil {
return 0
@@ -98,7 +108,7 @@ func (mc *ModelCatalog) calculateBaseCost(result *schemas.BifrostResponse) float
requestType = normalizeStreamRequestType(requestType)
// Resolve pricing entry with deployment fallback
- pricing := mc.resolvePricing(provider, model, deployment, requestType)
+ pricing := mc.resolvePricing(provider, model, deployment, requestType, scopes)
if pricing == nil {
return 0
}
@@ -598,7 +608,10 @@ func tieredInputRate(pricing *configstoreTables.TableModelPricing, totalTokens i
if totalTokens > TokenTierAbove128K && pricing.InputCostPerTokenAbove128kTokens != nil {
return *pricing.InputCostPerTokenAbove128kTokens
}
- return pricing.InputCostPerToken
+ if pricing.InputCostPerToken != nil {
+ return *pricing.InputCostPerToken
+ }
+ return 0
}
// tieredOutputRate returns the effective per-token output rate based on total token count.
@@ -609,7 +622,10 @@ func tieredOutputRate(pricing *configstoreTables.TableModelPricing, totalTokens
if totalTokens > TokenTierAbove128K && pricing.OutputCostPerTokenAbove128kTokens != nil {
return *pricing.OutputCostPerTokenAbove128kTokens
}
- return pricing.OutputCostPerToken
+ if pricing.OutputCostPerToken != nil {
+ return *pricing.OutputCostPerToken
+ }
+ return 0
}
// tieredImageInputRate returns the effective rate for image tokens on the input side.
@@ -743,28 +759,60 @@ func populateOutputImageCount(imageUsage *schemas.ImageUsage, dataLen int) {
// ---------------------------------------------------------------------------
// resolvePricing resolves the pricing entry for a model, trying deployment as fallback.
-func (mc *ModelCatalog) resolvePricing(provider, model, deployment string, requestType schemas.RequestType) *configstoreTables.TableModelPricing {
+func (mc *ModelCatalog) resolvePricing(provider, model, deployment string, requestType schemas.RequestType, scopes PricingLookupScopes) *configstoreTables.TableModelPricing {
mc.logger.Debug("looking up pricing for model %s and provider %s of request type %s", model, provider, normalizeRequestType(requestType))
- pricing, exists := mc.getPricing(model, provider, requestType)
- if exists {
- return pricing
+ if scopes.Provider == "" {
+ scopes.Provider = provider
+ }
+
+ base, exists := mc.getBasePricing(model, provider, requestType)
+ if exists && base != nil {
+ result, _ := mc.applyPricingOverrides(model, requestType, *base, scopes)
+ return &result
}
if deployment != "" {
mc.logger.Debug("pricing not found for model %s, trying deployment %s", model, deployment)
- pricing, exists = mc.getPricing(deployment, provider, requestType)
- if exists {
- return pricing
+ base, exists = mc.getBasePricing(deployment, provider, requestType)
+ if exists && base != nil {
+ // Apply overrides using the requested model name, not the deployment name
+ result, _ := mc.applyPricingOverrides(model, requestType, *base, scopes)
+ return &result
}
}
- mc.logger.Debug("pricing not found for model %s and provider %s, skipping cost calculation", model, provider)
+ // No base catalog entry found; still try overrides in case the user defined
+ // override-only pricing for a model not in the built-in catalog.
+ mc.logger.Debug("pricing not found for model %s and provider %s, trying override-only pricing", model, provider)
+ result, applied := mc.applyPricingOverrides(model, requestType, configstoreTables.TableModelPricing{}, scopes)
+ if applied {
+ return &result
+ }
+ mc.logger.Debug("no pricing found for model %s and provider %s, skipping cost calculation", model, provider)
return nil
}
-// getPricing returns pricing information for a model (thread-safe)
-func (mc *ModelCatalog) getPricing(model, provider string, requestType schemas.RequestType) (*configstoreTables.TableModelPricing, bool) {
+// getBasePricing looks up catalog pricing for the given model, provider, and request type.
+// It applies a provider-specific fallback chain when an exact match is not found:
+//
+// - Gemini: retries under the "vertex" provider, then falls back to chat mode for Responses requests.
+// - Vertex: strips the "provider/model" prefix and retries, then falls back to chat mode for Responses requests.
+// - Bedrock: prepends the "anthropic." namespace for Claude models, then falls back to chat mode for Responses requests.
+// - All providers: for Responses/ResponsesStream requests, retries the lookup in chat mode.
+// - All providers: for ImageEdit/ImageVariation requests, retries the lookup in image-generation mode.
+//
+// The method acquires a read lock for the duration of the lookup.
+//
+// Input: model — exact model name to look up.
+//
+// provider — provider identifier (e.g. "openai", "anthropic").
+// requestType — the request type used to derive the pricing mode.
+//
+// Output: TableModelPricing — the matched pricing row (zero value when not found).
+//
+// bool — true when a pricing entry was found, false otherwise.
+func (mc *ModelCatalog) getBasePricing(model, provider string, requestType schemas.RequestType) (*configstoreTables.TableModelPricing, bool) {
mc.mu.RLock()
defer mc.mu.RUnlock()
diff --git a/framework/modelcatalog/pricing_test.go b/framework/modelcatalog/pricing_test.go
index 1433e0035f..d69301b4a7 100644
--- a/framework/modelcatalog/pricing_test.go
+++ b/framework/modelcatalog/pricing_test.go
@@ -3,6 +3,7 @@ package modelcatalog
import (
"testing"
+ bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
"github.com/stretchr/testify/assert"
@@ -13,17 +14,14 @@ import (
// helpers
// ---------------------------------------------------------------------------
-func ptr(v float64) *float64 { return &v }
-func intPtr(v int) *int { return &v }
-
// chatPricing returns a TableModelPricing with the given per-token rates.
func chatPricing(input, output float64) configstoreTables.TableModelPricing {
return configstoreTables.TableModelPricing{
Model: "test-model",
Provider: "test-provider",
Mode: "chat",
- InputCostPerToken: input,
- OutputCostPerToken: output,
+ InputCostPerToken: bifrost.Ptr(input),
+ OutputCostPerToken: bifrost.Ptr(output),
}
}
@@ -93,6 +91,13 @@ func makeImageResponse(provider schemas.ModelProvider, model string, usage *sche
}
}
+func derefF(f *float64) float64 {
+ if f == nil {
+ return 0
+ }
+ return *f
+}
+
// =========================================================================
// 1. computeTextCost — unit tests (pure function, no catalog)
// =========================================================================
@@ -124,8 +129,8 @@ func TestComputeTextCost_ZeroTokens(t *testing.T) {
func TestComputeTextCost_WithCachedPromptTokens(t *testing.T) {
// Claude 3.5 Sonnet (Bedrock): input=$3/M, output=$15/M, cache_read=$0.3/M, cache_creation=$3.75/M
p := chatPricing(0.000003, 0.000015)
- p.CacheReadInputTokenCost = ptr(0.0000003)
- p.CacheCreationInputTokenCost = ptr(0.00000375)
+ p.CacheReadInputTokenCost = bifrost.Ptr(0.0000003)
+ p.CacheCreationInputTokenCost = bifrost.Ptr(0.00000375)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 2000,
@@ -149,8 +154,8 @@ func TestComputeTextCost_WithCachedPromptTokens(t *testing.T) {
func TestComputeTextCost_Tiered200k(t *testing.T) {
// Claude 3.5 Sonnet Bedrock 200k tier: input=$6/M, output=$30/M
p := chatPricing(0.000003, 0.000015)
- p.InputCostPerTokenAbove200kTokens = ptr(0.000006)
- p.OutputCostPerTokenAbove200kTokens = ptr(0.00003)
+ p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006)
+ p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 180000,
@@ -167,8 +172,8 @@ func TestComputeTextCost_Tiered200k(t *testing.T) {
func TestComputeTextCost_Below200kUsesBaseRate(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
- p.InputCostPerTokenAbove200kTokens = ptr(0.000006)
- p.OutputCostPerTokenAbove200kTokens = ptr(0.00003)
+ p.InputCostPerTokenAbove200kTokens = bifrost.Ptr(0.000006)
+ p.OutputCostPerTokenAbove200kTokens = bifrost.Ptr(0.00003)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
@@ -185,7 +190,7 @@ func TestComputeTextCost_Below200kUsesBaseRate(t *testing.T) {
func TestComputeTextCost_SearchQueryCost(t *testing.T) {
p := chatPricing(0.000003, 0.000015)
- p.SearchContextCostPerQuery = ptr(0.01) // $0.01 per search query
+ p.SearchContextCostPerQuery = bifrost.Ptr(0.01) // $0.01 per search query
numQueries := 3
usage := &schemas.BifrostLLMUsage{
@@ -232,8 +237,8 @@ func TestComputeTextCost_NoCacheRateFallsBackToBaseInputRate(t *testing.T) {
func TestComputeEmbeddingCost_Basic(t *testing.T) {
// Titan Embed Text v1: $0.1/M input
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.0000001,
- OutputCostPerToken: 0,
+ InputCostPerToken: bifrost.Ptr(0.0000001),
+ OutputCostPerToken: bifrost.Ptr(0.0),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 5000,
@@ -245,7 +250,7 @@ func TestComputeEmbeddingCost_Basic(t *testing.T) {
}
func TestComputeEmbeddingCost_NilUsage(t *testing.T) {
- p := configstoreTables.TableModelPricing{InputCostPerToken: 0.0000001}
+ p := configstoreTables.TableModelPricing{InputCostPerToken: bifrost.Ptr(0.0000001)}
assert.Equal(t, 0.0, computeEmbeddingCost(&p, nil))
}
@@ -255,8 +260,8 @@ func TestComputeEmbeddingCost_NilUsage(t *testing.T) {
func TestComputeRerankCost_Basic(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000001,
- OutputCostPerToken: 0.000002,
+ InputCostPerToken: bifrost.Ptr(0.000001),
+ OutputCostPerToken: bifrost.Ptr(0.000002),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 2000,
@@ -270,9 +275,9 @@ func TestComputeRerankCost_Basic(t *testing.T) {
func TestComputeRerankCost_WithSearchCost(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0,
- OutputCostPerToken: 0,
- SearchContextCostPerQuery: ptr(0.001),
+ InputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerToken: bifrost.Ptr(0.0),
+ SearchContextCostPerQuery: bifrost.Ptr(0.001),
}
numQueries := 5
usage := &schemas.BifrostLLMUsage{
@@ -285,7 +290,7 @@ func TestComputeRerankCost_WithSearchCost(t *testing.T) {
}
func TestComputeRerankCost_NilUsage(t *testing.T) {
- p := configstoreTables.TableModelPricing{InputCostPerToken: 0.001}
+ p := configstoreTables.TableModelPricing{InputCostPerToken: bifrost.Ptr(0.001)}
assert.Equal(t, 0.0, computeRerankCost(&p, nil))
}
@@ -296,9 +301,9 @@ func TestComputeRerankCost_NilUsage(t *testing.T) {
func TestComputeSpeechCost_TokensPreferredOverDuration(t *testing.T) {
// TTS: input=text tokens, output=audio tokens (preferred over per-second)
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.0000025,
- OutputCostPerToken: 0.00001,
- OutputCostPerSecond: ptr(0.00025),
+ InputCostPerToken: bifrost.Ptr(0.0000025),
+ OutputCostPerToken: bifrost.Ptr(0.00001),
+ OutputCostPerSecond: bifrost.Ptr(0.00025),
}
seconds := 60
usage := &schemas.BifrostLLMUsage{
@@ -317,9 +322,9 @@ func TestComputeSpeechCost_TokensPreferredOverDuration(t *testing.T) {
func TestComputeSpeechCost_OutputFallsBackToPerSecond(t *testing.T) {
// TTS: no output tokens → falls back to per-second output pricing
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000001,
- OutputCostPerToken: 0.000002,
- OutputCostPerSecond: ptr(0.0001),
+ InputCostPerToken: bifrost.Ptr(0.000001),
+ OutputCostPerToken: bifrost.Ptr(0.000002),
+ OutputCostPerSecond: bifrost.Ptr(0.0001),
}
seconds := 120
usage := &schemas.BifrostLLMUsage{PromptTokens: 500}
@@ -333,9 +338,9 @@ func TestComputeSpeechCost_OutputFallsBackToPerSecond(t *testing.T) {
func TestComputeSpeechCost_OutputAudioTokenRate(t *testing.T) {
// TTS: output uses OutputCostPerAudioToken when available
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000001,
- OutputCostPerToken: 0.000002,
- OutputCostPerAudioToken: ptr(0.00005),
+ InputCostPerToken: bifrost.Ptr(0.000001),
+ OutputCostPerToken: bifrost.Ptr(0.000002),
+ OutputCostPerAudioToken: bifrost.Ptr(0.00005),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 200,
@@ -373,9 +378,9 @@ func TestComputeSpeechCost_NilUsageNilSeconds(t *testing.T) {
func TestComputeTranscriptionCost_DurationBased(t *testing.T) {
// assemblyai/nano: input_cost_per_second=0.00010278
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0,
- OutputCostPerToken: 0,
- InputCostPerSecond: ptr(0.00010278),
+ InputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerToken: bifrost.Ptr(0.0),
+ InputCostPerSecond: bifrost.Ptr(0.00010278),
}
seconds := 300 // 5 minutes
cost := computeTranscriptionCost(&p, nil, &seconds, nil)
@@ -385,9 +390,9 @@ func TestComputeTranscriptionCost_DurationBased(t *testing.T) {
func TestComputeTranscriptionCost_AudioTokenDetails(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
- InputCostPerAudioToken: ptr(0.00001),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ InputCostPerAudioToken: bifrost.Ptr(0.00001),
}
usage := &schemas.BifrostLLMUsage{
PromptTokens: 2000,
@@ -421,10 +426,10 @@ func TestComputeTranscriptionCost_TokenFallback(t *testing.T) {
func TestComputeTranscriptionCost_TokenDetailsPreferredOverDuration(t *testing.T) {
// STT: audio token details present → uses tokens, not per-second
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0,
- InputCostPerAudioPerSecond: ptr(0.0001),
- InputCostPerAudioToken: ptr(0.00001),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.0),
+ InputCostPerAudioPerSecond: bifrost.Ptr(0.0001),
+ InputCostPerAudioToken: bifrost.Ptr(0.00001),
}
seconds := 60
audioDetails := &schemas.TranscriptionUsageInputTokenDetails{
@@ -443,9 +448,9 @@ func TestComputeTranscriptionCost_TokenDetailsPreferredOverDuration(t *testing.T
func TestComputeTranscriptionCost_DurationFallbackWhenNoTokens(t *testing.T) {
// STT: no audio token details, no prompt tokens → falls back to per-second
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
- InputCostPerAudioPerSecond: ptr(0.0001),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ InputCostPerAudioPerSecond: bifrost.Ptr(0.0001),
}
seconds := 60
usage := &schemas.BifrostLLMUsage{
@@ -466,9 +471,9 @@ func TestComputeTranscriptionCost_DurationFallbackWhenNoTokens(t *testing.T) {
func TestComputeImageCost_PerImage(t *testing.T) {
// dall-e-3 (aiml): output_cost_per_image=$0.052
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0,
- OutputCostPerToken: 0,
- OutputCostPerImage: ptr(0.052),
+ InputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerImage: bifrost.Ptr(0.052),
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{
@@ -482,7 +487,7 @@ func TestComputeImageCost_PerImage(t *testing.T) {
func TestComputeImageCost_PerImageDefaultsToOne(t *testing.T) {
p := configstoreTables.TableModelPricing{
- OutputCostPerImage: ptr(0.052),
+ OutputCostPerImage: bifrost.Ptr(0.052),
}
usage := &schemas.ImageUsage{} // No token details → defaults to 1 image
cost := computeImageCost(&p, usage, "", "")
@@ -491,8 +496,8 @@ func TestComputeImageCost_PerImageDefaultsToOne(t *testing.T) {
func TestComputeImageCost_TokenBased(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
}
usage := &schemas.ImageUsage{
InputTokens: 1000,
@@ -506,8 +511,8 @@ func TestComputeImageCost_TokenBased(t *testing.T) {
func TestComputeImageCost_TokenBasedWithDetails(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
}
usage := &schemas.ImageUsage{
InputTokens: 2000,
@@ -530,14 +535,14 @@ func TestComputeImageCost_TokenBasedWithDetails(t *testing.T) {
}
func TestComputeImageCost_NilUsage(t *testing.T) {
- p := configstoreTables.TableModelPricing{OutputCostPerImage: ptr(0.05)}
+ p := configstoreTables.TableModelPricing{OutputCostPerImage: bifrost.Ptr(0.05)}
assert.Equal(t, 0.0, computeImageCost(&p, nil, "", ""))
}
func TestComputeImageCost_InputAndOutputPerImage(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerImage: ptr(0.01),
- OutputCostPerImage: ptr(0.05),
+ InputCostPerImage: bifrost.Ptr(0.01),
+ OutputCostPerImage: bifrost.Ptr(0.05),
}
usage := &schemas.ImageUsage{
NumInputImages: 3,
@@ -550,7 +555,7 @@ func TestComputeImageCost_InputAndOutputPerImage(t *testing.T) {
func TestComputeImageCost_PerPixelOutput(t *testing.T) {
p := configstoreTables.TableModelPricing{
- OutputCostPerPixel: ptr(0.000000019), // ~$0.02 for 1024x1024
+ OutputCostPerPixel: bifrost.Ptr(0.000000019), // ~$0.02 for 1024x1024
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
@@ -562,8 +567,8 @@ func TestComputeImageCost_PerPixelOutput(t *testing.T) {
func TestComputeImageCost_PerPixelInputAndOutput(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerPixel: ptr(0.00000001),
- OutputCostPerPixel: ptr(0.00000002),
+ InputCostPerPixel: bifrost.Ptr(0.00000001),
+ OutputCostPerPixel: bifrost.Ptr(0.00000002),
}
usage := &schemas.ImageUsage{
NumInputImages: 2,
@@ -579,10 +584,10 @@ func TestComputeImageCost_PerPixelInputAndOutput(t *testing.T) {
func TestComputeImageCost_TokensPreferredOverPixels(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
- InputCostPerPixel: ptr(0.00000001),
- OutputCostPerPixel: ptr(0.00000002),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ InputCostPerPixel: bifrost.Ptr(0.00000001),
+ OutputCostPerPixel: bifrost.Ptr(0.00000002),
}
usage := &schemas.ImageUsage{
InputTokens: 1000,
@@ -596,8 +601,8 @@ func TestComputeImageCost_TokensPreferredOverPixels(t *testing.T) {
func TestComputeImageCost_PixelsPreferredOverPerImage(t *testing.T) {
p := configstoreTables.TableModelPricing{
- OutputCostPerPixel: ptr(0.00000002),
- OutputCostPerImage: ptr(999.0), // should not be used
+ OutputCostPerPixel: bifrost.Ptr(0.00000002),
+ OutputCostPerImage: bifrost.Ptr(999.0), // should not be used
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 1},
@@ -609,8 +614,8 @@ func TestComputeImageCost_PixelsPreferredOverPerImage(t *testing.T) {
func TestComputeImageCost_PerPixelFallsBackToPerImage_WhenNoSize(t *testing.T) {
p := configstoreTables.TableModelPricing{
- OutputCostPerPixel: ptr(0.00000002),
- OutputCostPerImage: ptr(0.05),
+ OutputCostPerPixel: bifrost.Ptr(0.00000002),
+ OutputCostPerImage: bifrost.Ptr(0.05),
}
usage := &schemas.ImageUsage{
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 2},
@@ -626,11 +631,11 @@ func TestComputeImageCost_QualityBasedRates(t *testing.T) {
}
// Quality-specific rates take precedence over base/size-tier
p := configstoreTables.TableModelPricing{
- OutputCostPerImage: ptr(0.01),
- OutputCostPerImageLowQuality: ptr(0.02),
- OutputCostPerImageMediumQuality: ptr(0.03),
- OutputCostPerImageHighQuality: ptr(0.04),
- OutputCostPerImageAutoQuality: ptr(0.05),
+ OutputCostPerImage: bifrost.Ptr(0.01),
+ OutputCostPerImageLowQuality: bifrost.Ptr(0.02),
+ OutputCostPerImageMediumQuality: bifrost.Ptr(0.03),
+ OutputCostPerImageHighQuality: bifrost.Ptr(0.04),
+ OutputCostPerImageAutoQuality: bifrost.Ptr(0.05),
}
assert.InDelta(t, 0.02, computeImageCost(&p, usage, "", "low"), 1e-12)
assert.InDelta(t, 0.03, computeImageCost(&p, usage, "", "medium"), 1e-12)
@@ -659,9 +664,9 @@ func TestParseImagePixels(t *testing.T) {
func TestComputeVideoCost_DurationBased(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000001,
- OutputCostPerToken: 0,
- OutputCostPerVideoPerSecond: ptr(0.001),
+ InputCostPerToken: bifrost.Ptr(0.000001),
+ OutputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerVideoPerSecond: bifrost.Ptr(0.001),
}
seconds := 30
usage := &schemas.BifrostLLMUsage{PromptTokens: 500, TotalTokens: 500}
@@ -674,9 +679,9 @@ func TestComputeVideoCost_DurationBased(t *testing.T) {
func TestComputeVideoCost_OutputCostPerSecondFallback(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0,
- OutputCostPerToken: 0,
- OutputCostPerSecond: ptr(0.002),
+ InputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerSecond: bifrost.Ptr(0.002),
}
seconds := 10
cost := computeVideoCost(&p, nil, &seconds)
@@ -685,8 +690,8 @@ func TestComputeVideoCost_OutputCostPerSecondFallback(t *testing.T) {
func TestComputeVideoCost_NilSeconds(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000001,
- OutputCostPerVideoPerSecond: ptr(0.001),
+ InputCostPerToken: bifrost.Ptr(0.000001),
+ OutputCostPerVideoPerSecond: bifrost.Ptr(0.001),
}
usage := &schemas.BifrostLLMUsage{PromptTokens: 1000}
cost := computeVideoCost(&p, usage, nil)
@@ -700,23 +705,23 @@ func TestComputeVideoCost_NilSeconds(t *testing.T) {
func TestTieredInputRate_BelowThreshold(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000003,
- InputCostPerTokenAbove200kTokens: ptr(0.000006),
+ InputCostPerToken: bifrost.Ptr(0.000003),
+ InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
}
assert.Equal(t, 0.000003, tieredInputRate(&p, 100000))
}
func TestTieredInputRate_AboveThreshold(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000003,
- InputCostPerTokenAbove200kTokens: ptr(0.000006),
+ InputCostPerToken: bifrost.Ptr(0.000003),
+ InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
}
assert.Equal(t, 0.000006, tieredInputRate(&p, 210000))
}
func TestTieredInputRate_AboveThresholdNoTieredRate(t *testing.T) {
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000003,
+ InputCostPerToken: bifrost.Ptr(0.000003),
}
// Falls back to base rate when tiered field is nil
assert.Equal(t, 0.000003, tieredInputRate(&p, 300000))
@@ -724,8 +729,8 @@ func TestTieredInputRate_AboveThresholdNoTieredRate(t *testing.T) {
func TestTieredOutputRate_AboveThreshold(t *testing.T) {
p := configstoreTables.TableModelPricing{
- OutputCostPerToken: 0.000015,
- OutputCostPerTokenAbove200kTokens: ptr(0.00003),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
}
assert.Equal(t, 0.00003, tieredOutputRate(&p, 250000))
}
@@ -772,9 +777,9 @@ func TestExtractCostInput_TranscriptionWithSeconds(t *testing.T) {
TranscriptionResponse: &schemas.BifrostTranscriptionResponse{
Usage: &schemas.TranscriptionUsage{
Seconds: &sec,
- InputTokens: intPtr(1000),
- OutputTokens: intPtr(200),
- TotalTokens: intPtr(1200),
+ InputTokens: bifrost.Ptr(1000),
+ OutputTokens: bifrost.Ptr(200),
+ TotalTokens: bifrost.Ptr(1200),
},
},
}
@@ -833,7 +838,7 @@ func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
- InputCostPerToken: 0.000005, OutputCostPerToken: 0.000015,
+ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
},
})
@@ -853,7 +858,7 @@ func TestCalculateCost_SemanticCacheDirectHit(t *testing.T) {
},
}
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
@@ -865,11 +870,11 @@ func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
- InputCostPerToken: 0.000005, OutputCostPerToken: 0.000015,
+ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
},
makeKey("text-embedding-3-small", "openai", "embedding"): {
Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding",
- InputCostPerToken: 0.00000002,
+ InputCostPerToken: bifrost.Ptr(0.00000002),
},
})
@@ -892,7 +897,7 @@ func TestCalculateCost_SemanticCacheSemanticHit(t *testing.T) {
},
}
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// Only embedding cost: 500 * 0.00000002 = 0.00001
assert.InDelta(t, 0.00001, cost, 1e-12)
}
@@ -905,11 +910,11 @@ func TestCalculateCost_SemanticCacheMiss(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
- InputCostPerToken: 0.000005, OutputCostPerToken: 0.000015,
+ InputCostPerToken: bifrost.Ptr(0.000005), OutputCostPerToken: bifrost.Ptr(0.000015),
},
makeKey("text-embedding-3-small", "openai", "embedding"): {
Model: "text-embedding-3-small", Provider: "openai", Mode: "embedding",
- InputCostPerToken: 0.00000002,
+ InputCostPerToken: bifrost.Ptr(0.00000002),
},
})
@@ -930,7 +935,7 @@ func TestCalculateCost_SemanticCacheMiss(t *testing.T) {
},
}
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// Base cost: 1000*0.000005 + 500*0.000015 = 0.005 + 0.0075 = 0.0125
// Embedding cost: 500 * 0.00000002 = 0.00001
// Total: 0.01251
@@ -951,7 +956,7 @@ func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) {
},
}
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
@@ -961,7 +966,7 @@ func TestCalculateCost_SemanticCacheHitNoEmbeddingInfo(t *testing.T) {
func TestCalculateCost_NilResponse(t *testing.T) {
mc := testCatalogWithPricing(nil)
- assert.Equal(t, 0.0, mc.CalculateCost(nil))
+ assert.Equal(t, 0.0, mc.CalculateCost(nil, nil))
}
func TestCalculateCost_ProviderComputedCostPassthrough(t *testing.T) {
@@ -978,7 +983,7 @@ func TestCalculateCost_ProviderComputedCostPassthrough(t *testing.T) {
},
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.99, cost)
}
@@ -988,7 +993,7 @@ func TestCalculateCost_NoUsageData(t *testing.T) {
})
resp := makeChatResponse(schemas.OpenAI, "gpt-4o", nil)
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
@@ -997,9 +1002,9 @@ func TestCalculateCost_ChatCompletion_GPT4o(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): {
Model: "gpt-4o", Provider: "openai", Mode: "chat",
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
- CacheReadInputTokenCost: ptr(0.0000005),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ CacheReadInputTokenCost: bifrost.Ptr(0.0000005),
},
})
@@ -1009,7 +1014,7 @@ func TestCalculateCost_ChatCompletion_GPT4o(t *testing.T) {
TotalTokens: 12000,
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// 10000*0.000005 + 2000*0.000015 = 0.05 + 0.03 = 0.08
assert.InDelta(t, 0.08, cost, 1e-12)
}
@@ -1019,12 +1024,12 @@ func TestCalculateCost_ChatCompletion_Claude35Sonnet_WithCache(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): {
Model: "anthropic.claude-3-5-sonnet-20241022-v2:0", Provider: "bedrock", Mode: "chat",
- InputCostPerToken: 0.000003,
- OutputCostPerToken: 0.000015,
- CacheReadInputTokenCost: ptr(0.0000003),
- CacheCreationInputTokenCost: ptr(0.00000375),
- InputCostPerTokenAbove200kTokens: ptr(0.000006),
- OutputCostPerTokenAbove200kTokens: ptr(0.00003),
+ InputCostPerToken: bifrost.Ptr(0.000003),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ CacheReadInputTokenCost: bifrost.Ptr(0.0000003),
+ CacheCreationInputTokenCost: bifrost.Ptr(0.00000375),
+ InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
+ OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
},
})
@@ -1038,7 +1043,7 @@ func TestCalculateCost_ChatCompletion_Claude35Sonnet_WithCache(t *testing.T) {
},
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// Both cached read and write tokens are input-side deductions from promptTokens.
// Input: (5000-3000-500)*0.000003 + 3000*0.0000003 + 500*0.00000375 = 0.0045 + 0.0009 + 0.001875 = 0.007275
// Output: 1000*0.000015 = 0.015
@@ -1051,8 +1056,8 @@ func TestCalculateCost_Embedding(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("amazon.titan-embed-text-v1", "bedrock", "embedding"): {
Model: "amazon.titan-embed-text-v1", Provider: "bedrock", Mode: "embedding",
- InputCostPerToken: 0.0000001,
- OutputCostPerToken: 0,
+ InputCostPerToken: bifrost.Ptr(0.0000001),
+ OutputCostPerToken: bifrost.Ptr(0.0),
},
})
@@ -1061,7 +1066,7 @@ func TestCalculateCost_Embedding(t *testing.T) {
TotalTokens: 10000,
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// 10000 * 0.0000001 = 0.001
assert.InDelta(t, 0.001, cost, 1e-12)
}
@@ -1070,8 +1075,8 @@ func TestCalculateCost_Rerank(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("amazon.rerank-v1:0", "bedrock", "rerank"): {
Model: "amazon.rerank-v1:0", Provider: "bedrock", Mode: "rerank",
- InputCostPerToken: 0,
- OutputCostPerToken: 0,
+ InputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerToken: bifrost.Ptr(0.0),
},
})
@@ -1080,7 +1085,7 @@ func TestCalculateCost_Rerank(t *testing.T) {
TotalTokens: 500,
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
@@ -1089,7 +1094,7 @@ func TestCalculateCost_ImageGeneration(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("dall-e-3", "aiml", "image_generation"): {
Model: "dall-e-3", Provider: "aiml", Mode: "image_generation",
- OutputCostPerImage: ptr(0.052),
+ OutputCostPerImage: bifrost.Ptr(0.052),
},
})
@@ -1097,7 +1102,7 @@ func TestCalculateCost_ImageGeneration(t *testing.T) {
OutputTokensDetails: &schemas.ImageTokenDetails{NImages: 3},
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// 3 * 0.052 = 0.156
assert.InDelta(t, 0.156, cost, 1e-12)
}
@@ -1119,7 +1124,7 @@ func TestCalculateCost_StreamRequestTypeNormalized(t *testing.T) {
},
}
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.InDelta(t, 0.0125, cost, 1e-12)
}
@@ -1128,7 +1133,7 @@ func TestCalculateCost_NoPricingData(t *testing.T) {
resp := makeChatResponse(schemas.OpenAI, "unknown-model", &schemas.BifrostLLMUsage{
PromptTokens: 1000, CompletionTokens: 500, TotalTokens: 1500,
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.Equal(t, 0.0, cost)
}
@@ -1140,57 +1145,51 @@ func TestGetPricing_DirectLookup(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
- p, ok := mc.getPricing("gpt-4o", "openai", schemas.ChatCompletionRequest)
- require.True(t, ok)
- assert.Equal(t, 0.000005, p.InputCostPerToken)
+ p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
+ assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestGetPricing_GeminiFallsBackToVertex(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gemini-2.0-flash", "vertex", "chat"): {
Model: "gemini-2.0-flash", Provider: "vertex", Mode: "chat",
- InputCostPerToken: 0.0000001, OutputCostPerToken: 0.0000004,
+ InputCostPerToken: bifrost.Ptr(0.0000001), OutputCostPerToken: bifrost.Ptr(0.0000004),
},
})
- p, ok := mc.getPricing("gemini-2.0-flash", "gemini", schemas.ChatCompletionRequest)
- require.True(t, ok)
- assert.Equal(t, 0.0000001, p.InputCostPerToken)
+ p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "gemini"})
+ assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
}
func TestGetPricing_VertexStripsProviderPrefix(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004),
})
- p, ok := mc.getPricing("google/gemini-2.0-flash", "vertex", schemas.ChatCompletionRequest)
- require.True(t, ok)
- assert.Equal(t, 0.0000001, p.InputCostPerToken)
+ p := mc.resolvePricing("vertex", "google/gemini-2.0-flash", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "vertex"})
+ assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
}
func TestGetPricing_BedrockAddsAnthropicPrefix(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", "chat"): chatPricing(0.000003, 0.000015),
})
- p, ok := mc.getPricing("claude-3-5-sonnet-20241022-v2:0", "bedrock", schemas.ChatCompletionRequest)
- require.True(t, ok)
- assert.Equal(t, 0.000003, p.InputCostPerToken)
+ p := mc.resolvePricing("bedrock", "claude-3-5-sonnet-20241022-v2:0", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "bedrock"})
+ assert.Equal(t, 0.000003, derefF(p.InputCostPerToken))
}
func TestGetPricing_ResponsesFallsBackToChat(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
- p, ok := mc.getPricing("gpt-4o", "openai", schemas.ResponsesRequest)
- require.True(t, ok)
- assert.Equal(t, 0.000005, p.InputCostPerToken)
+ p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "openai"})
+ assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestGetPricing_ResponsesStreamFallsBackToChat(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("gpt-4o", "openai", "chat"): chatPricing(0.000005, 0.000015),
})
- p, ok := mc.getPricing("gpt-4o", "openai", schemas.ResponsesStreamRequest)
- require.True(t, ok)
- assert.Equal(t, 0.000005, p.InputCostPerToken)
+ p := mc.resolvePricing("openai", "gpt-4o", "", schemas.ResponsesStreamRequest, PricingLookupScopes{Provider: "openai"})
+ assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestGetPricing_GeminiResponsesFallsBackToVertexChat(t *testing.T) {
@@ -1198,15 +1197,14 @@ func TestGetPricing_GeminiResponsesFallsBackToVertexChat(t *testing.T) {
makeKey("gemini-2.0-flash", "vertex", "chat"): chatPricing(0.0000001, 0.0000004),
})
// gemini provider + responses request → try vertex + responses → try vertex + chat
- p, ok := mc.getPricing("gemini-2.0-flash", "gemini", schemas.ResponsesRequest)
- require.True(t, ok)
- assert.Equal(t, 0.0000001, p.InputCostPerToken)
+ p := mc.resolvePricing("gemini", "gemini-2.0-flash", "", schemas.ResponsesRequest, PricingLookupScopes{Provider: "gemini"})
+ assert.Equal(t, 0.0000001, derefF(p.InputCostPerToken))
}
func TestGetPricing_NotFound(t *testing.T) {
mc := testCatalogWithPricing(nil)
- _, ok := mc.getPricing("nonexistent", "openai", schemas.ChatCompletionRequest)
- assert.False(t, ok)
+ p := mc.resolvePricing("openai", "nonexistent", "", schemas.ChatCompletionRequest, PricingLookupScopes{Provider: "openai"})
+ assert.Nil(t, p)
}
// =========================================================================
@@ -1219,9 +1217,9 @@ func TestResolvePricing_DeploymentFallback(t *testing.T) {
})
// Model not found directly, but deployment matches
- p := mc.resolvePricing("openai", "gpt-4o-custom", "my-deployment", schemas.ChatCompletionRequest)
+ p := mc.resolvePricing("openai", "gpt-4o-custom", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{})
require.NotNil(t, p)
- assert.Equal(t, 0.000005, p.InputCostPerToken)
+ assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestResolvePricing_ModelFoundDirectly(t *testing.T) {
@@ -1231,14 +1229,14 @@ func TestResolvePricing_ModelFoundDirectly(t *testing.T) {
})
// Model found directly — doesn't fall back to deployment
- p := mc.resolvePricing("openai", "gpt-4o", "my-deployment", schemas.ChatCompletionRequest)
+ p := mc.resolvePricing("openai", "gpt-4o", "my-deployment", schemas.ChatCompletionRequest, PricingLookupScopes{})
require.NotNil(t, p)
- assert.Equal(t, 0.000005, p.InputCostPerToken)
+ assert.Equal(t, 0.000005, derefF(p.InputCostPerToken))
}
func TestResolvePricing_NothingFound(t *testing.T) {
mc := testCatalogWithPricing(nil)
- p := mc.resolvePricing("openai", "unknown", "", schemas.ChatCompletionRequest)
+ p := mc.resolvePricing("openai", "unknown", "", schemas.ChatCompletionRequest, PricingLookupScopes{})
assert.Nil(t, p)
}
@@ -1327,14 +1325,14 @@ func TestCalculateCost_200kTier_EndToEnd(t *testing.T) {
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
makeKey("anthropic.claude-3-5-sonnet-20240620-v1:0", "bedrock", "chat"): {
Model: "anthropic.claude-3-5-sonnet-20240620-v1:0", Provider: "bedrock", Mode: "chat",
- InputCostPerToken: 0.000003,
- OutputCostPerToken: 0.000015,
- InputCostPerTokenAbove200kTokens: ptr(0.000006),
- OutputCostPerTokenAbove200kTokens: ptr(0.00003),
- CacheReadInputTokenCost: ptr(0.0000003),
- CacheCreationInputTokenCost: ptr(0.00000375),
- CacheReadInputTokenCostAbove200kTokens: ptr(0.0000006),
- CacheCreationInputTokenCostAbove200kTokens: ptr(0.0000075),
+ InputCostPerToken: bifrost.Ptr(0.000003),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ InputCostPerTokenAbove200kTokens: bifrost.Ptr(0.000006),
+ OutputCostPerTokenAbove200kTokens: bifrost.Ptr(0.00003),
+ CacheReadInputTokenCost: bifrost.Ptr(0.0000003),
+ CacheCreationInputTokenCost: bifrost.Ptr(0.00000375),
+ CacheReadInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000006),
+ CacheCreationInputTokenCostAbove200kTokens: bifrost.Ptr(0.0000075),
},
})
@@ -1344,7 +1342,7 @@ func TestCalculateCost_200kTier_EndToEnd(t *testing.T) {
TotalTokens: 210000, // Above 200k
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// Tiered rate: input=0.000006, output=0.00003
// 190000*0.000006 + 20000*0.00003 = 1.14 + 0.6 = 1.74
assert.InDelta(t, 1.74, cost, 1e-9)
@@ -1365,14 +1363,14 @@ func TestCalculateCost_ProviderCostZeroTotalStillCalculates(t *testing.T) {
},
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.InDelta(t, 0.0125, cost, 1e-12)
}
func TestCalculateCost_AllCachedTokens(t *testing.T) {
// All prompt tokens are from cache
p := chatPricing(0.000005, 0.000015)
- p.CacheReadInputTokenCost = ptr(0.0000005)
+ p.CacheReadInputTokenCost = bifrost.Ptr(0.0000005)
usage := &schemas.BifrostLLMUsage{
PromptTokens: 1000,
@@ -1398,8 +1396,8 @@ func TestCalculateCost_ImageGeneration_NilUsage_PerImagePricing(t *testing.T) {
Model: "dall-e-3",
Provider: "openai",
Mode: "image_generation",
- InputCostPerToken: 0,
- OutputCostPerImage: ptr(0.04),
+ InputCostPerToken: bifrost.Ptr(0.0),
+ OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
@@ -1407,7 +1405,7 @@ func TestCalculateCost_ImageGeneration_NilUsage_PerImagePricing(t *testing.T) {
})
resp := makeImageResponse("openai", "dall-e-3", nil)
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// 1 image * $0.04 = $0.04
assert.InDelta(t, 0.04, cost, 1e-12)
}
@@ -1418,8 +1416,8 @@ func TestCalculateCost_ImageGeneration_NilUsage_InputAndOutputPerImage(t *testin
Model: "test-image-model",
Provider: "test",
Mode: "image_generation",
- InputCostPerImage: ptr(0.01),
- OutputCostPerImage: ptr(0.04),
+ InputCostPerImage: bifrost.Ptr(0.01),
+ OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
@@ -1427,7 +1425,7 @@ func TestCalculateCost_ImageGeneration_NilUsage_InputAndOutputPerImage(t *testin
})
resp := makeImageResponse("test", "test-image-model", nil)
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// NumInputImages is 0 (not populated from request), so only output pricing applies
// 1 output image * $0.04 = $0.04
assert.InDelta(t, 0.04, cost, 1e-12)
@@ -1439,8 +1437,8 @@ func TestCalculateCost_ImageGeneration_WithInputImages(t *testing.T) {
Model: "gpt-image-1",
Provider: "openai",
Mode: "image_generation",
- InputCostPerImage: ptr(0.01),
- OutputCostPerImage: ptr(0.04),
+ InputCostPerImage: bifrost.Ptr(0.01),
+ OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
@@ -1450,7 +1448,7 @@ func TestCalculateCost_ImageGeneration_WithInputImages(t *testing.T) {
resp := makeImageResponse("openai", "gpt-image-1", &schemas.ImageUsage{
NumInputImages: 2,
})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// 2 input images * $0.01 + 1 output image * $0.04 = $0.06
assert.InDelta(t, 0.06, cost, 1e-12)
}
@@ -1461,7 +1459,7 @@ func TestCalculateCost_ImageGeneration_OutputCountFromData(t *testing.T) {
Model: "dall-e-3",
Provider: "openai",
Mode: "image_generation",
- OutputCostPerImage: ptr(0.04),
+ OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
@@ -1482,7 +1480,7 @@ func TestCalculateCost_ImageGeneration_OutputCountFromData(t *testing.T) {
},
},
}
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// 3 output images * $0.04 = $0.12
assert.InDelta(t, 0.12, cost, 1e-12)
}
@@ -1493,8 +1491,8 @@ func TestCalculateCost_ImageGeneration_NilUsage_NoPerImagePricing(t *testing.T)
Model: "token-only-model",
Provider: "test",
Mode: "image_generation",
- InputCostPerToken: 0.000001,
- OutputCostPerToken: 0.000002,
+ InputCostPerToken: bifrost.Ptr(0.000001),
+ OutputCostPerToken: bifrost.Ptr(0.000002),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
@@ -1502,7 +1500,7 @@ func TestCalculateCost_ImageGeneration_NilUsage_NoPerImagePricing(t *testing.T)
})
resp := makeImageResponse("test", "token-only-model", nil)
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
// No per-image pricing and all tokens are zero → 0
assert.InDelta(t, 0.0, cost, 1e-12)
}
@@ -1513,7 +1511,7 @@ func TestCalculateCost_ImageGeneration_EmptyUsage_PerImagePricing(t *testing.T)
Model: "dall-e-3",
Provider: "openai",
Mode: "image_generation",
- OutputCostPerImage: ptr(0.04),
+ OutputCostPerImage: bifrost.Ptr(0.04),
}
mc := testCatalogWithPricing(map[string]configstoreTables.TableModelPricing{
@@ -1521,16 +1519,16 @@ func TestCalculateCost_ImageGeneration_EmptyUsage_PerImagePricing(t *testing.T)
})
resp := makeImageResponse("openai", "dall-e-3", &schemas.ImageUsage{})
- cost := mc.CalculateCost(resp)
+ cost := mc.CalculateCost(resp, nil)
assert.InDelta(t, 0.04, cost, 1e-12)
}
func TestComputeImageCost_MixedInputTokensOutputPerImage(t *testing.T) {
// Input has tokens (text prompt), output has no tokens but per-image pricing
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
- OutputCostPerImage: ptr(0.04),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ OutputCostPerImage: bifrost.Ptr(0.04),
}
usage := &schemas.ImageUsage{
InputTokens: 500,
@@ -1545,9 +1543,9 @@ func TestComputeImageCost_MixedInputTokensOutputPerImage(t *testing.T) {
func TestComputeImageCost_MixedInputPerImageOutputTokens(t *testing.T) {
// Input has no tokens but per-image count, output has tokens
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
- InputCostPerImage: ptr(0.01),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ InputCostPerImage: bifrost.Ptr(0.01),
}
usage := &schemas.ImageUsage{
NumInputImages: 3,
@@ -1562,10 +1560,10 @@ func TestComputeImageCost_MixedInputPerImageOutputTokens(t *testing.T) {
func TestComputeImageCost_BothHaveTokens_IgnoresPerImage(t *testing.T) {
// Both sides have tokens — per-image pricing is ignored
p := configstoreTables.TableModelPricing{
- InputCostPerToken: 0.000005,
- OutputCostPerToken: 0.000015,
- InputCostPerImage: ptr(0.01),
- OutputCostPerImage: ptr(0.04),
+ InputCostPerToken: bifrost.Ptr(0.000005),
+ OutputCostPerToken: bifrost.Ptr(0.000015),
+ InputCostPerImage: bifrost.Ptr(0.01),
+ OutputCostPerImage: bifrost.Ptr(0.04),
}
usage := &schemas.ImageUsage{
InputTokens: 200,
diff --git a/framework/modelcatalog/utils.go b/framework/modelcatalog/utils.go
index c477696c6a..4808ee844d 100644
--- a/framework/modelcatalog/utils.go
+++ b/framework/modelcatalog/utils.go
@@ -3,6 +3,7 @@ package modelcatalog
import (
"strings"
+ "github.com/bytedance/sonic"
"github.com/maximhq/bifrost/core/schemas"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
)
@@ -163,11 +164,7 @@ func convertPricingDataToTableModelPricing(modelKey string, entry PricingEntry)
// convertTableModelPricingToPricingData converts the TableModelPricing struct to a PricingEntry struct
func convertTableModelPricingToPricingData(pricing *configstoreTables.TableModelPricing) *PricingEntry {
- return &PricingEntry{
- BaseModel: pricing.BaseModel,
- Provider: pricing.Provider,
- Mode: pricing.Mode,
-
+ options := PricingOptions{
// Costs - Text
InputCostPerToken: pricing.InputCostPerToken,
OutputCostPerToken: pricing.OutputCostPerToken,
@@ -230,4 +227,30 @@ func convertTableModelPricingToPricingData(pricing *configstoreTables.TableModel
SearchContextCostPerQuery: pricing.SearchContextCostPerQuery,
CodeInterpreterCostPerSession: pricing.CodeInterpreterCostPerSession,
}
+ return &PricingEntry{
+ BaseModel: pricing.BaseModel,
+ Provider: pricing.Provider,
+ Mode: pricing.Mode,
+ PricingOptions: options,
+ }
+}
+
+// convertTablePricingOverrideToPricingOverride converts a TablePricingOverride to a PricingOverride.
+func convertTablePricingOverrideToPricingOverride(override *configstoreTables.TablePricingOverride) (PricingOverride, error) {
+ var options PricingOptions
+ if err := sonic.Unmarshal([]byte(override.PricingPatchJSON), &options); err != nil {
+ return PricingOverride{}, err
+ }
+ return PricingOverride{
+ ID: override.ID,
+ Name: override.Name,
+ ScopeKind: ScopeKind(override.ScopeKind),
+ VirtualKeyID: override.VirtualKeyID,
+ ProviderID: override.ProviderID,
+ ProviderKeyID: override.ProviderKeyID,
+ MatchType: MatchType(override.MatchType),
+ Pattern: override.Pattern,
+ RequestTypes: override.RequestTypes,
+ Options: options,
+ }, nil
}
diff --git a/framework/streaming/audio.go b/framework/streaming/audio.go
index d36fb47d36..9cc2aa6924 100644
--- a/framework/streaming/audio.go
+++ b/framework/streaming/audio.go
@@ -8,6 +8,7 @@ import (
bifrost "github.com/maximhq/bifrost/core"
schemas "github.com/maximhq/bifrost/core/schemas"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
)
// buildCompleteMessageFromAudioStreamChunks builds a complete message from accumulated audio chunks
@@ -145,7 +146,7 @@ func (a *Accumulator) processAudioStreamingResponse(ctx *schemas.BifrostContext,
chunk.ChunkIndex = result.SpeechStreamResponse.ExtraFields.ChunkIndex
if isFinalChunk {
if a.pricingManager != nil {
- cost := a.pricingManager.CalculateCost(result)
+ cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
chunk.Cost = bifrost.Ptr(cost)
}
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
diff --git a/framework/streaming/chat.go b/framework/streaming/chat.go
index dafd170902..1d87106913 100644
--- a/framework/streaming/chat.go
+++ b/framework/streaming/chat.go
@@ -8,6 +8,7 @@ import (
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
)
// deepCopyChatStreamDelta creates a deep copy of ChatStreamResponseChoiceDelta
@@ -497,7 +498,7 @@ func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext,
chunk.ChunkIndex = result.TextCompletionResponse.ExtraFields.ChunkIndex
if isFinalChunk {
if a.pricingManager != nil {
- cost := a.pricingManager.CalculateCost(result)
+ cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
chunk.Cost = bifrost.Ptr(cost)
}
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
@@ -523,7 +524,7 @@ func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext,
}
if isFinalChunk {
if a.pricingManager != nil {
- cost := a.pricingManager.CalculateCost(result)
+ cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
chunk.Cost = bifrost.Ptr(cost)
}
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
diff --git a/framework/streaming/images.go b/framework/streaming/images.go
index 23b2dd8f5c..446d1ca3b3 100644
--- a/framework/streaming/images.go
+++ b/framework/streaming/images.go
@@ -8,6 +8,7 @@ import (
bifrost "github.com/maximhq/bifrost/core"
schemas "github.com/maximhq/bifrost/core/schemas"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
)
// buildCompleteImageFromImageStreamChunks builds a complete image generation response from accumulated chunks
@@ -273,7 +274,7 @@ func (a *Accumulator) processImageStreamingResponse(ctx *schemas.BifrostContext,
if isFinalChunk {
if a.pricingManager != nil {
- cost := a.pricingManager.CalculateCost(result)
+ cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
chunk.Cost = bifrost.Ptr(cost)
}
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
diff --git a/framework/streaming/responses.go b/framework/streaming/responses.go
index 62a4739c6d..aa38248167 100644
--- a/framework/streaming/responses.go
+++ b/framework/streaming/responses.go
@@ -8,6 +8,7 @@ import (
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
)
// deepCopyResponsesStreamResponse creates a deep copy of BifrostResponsesStreamResponse
@@ -917,7 +918,7 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont
chunk.ChunkIndex = result.ResponsesStreamResponse.ExtraFields.ChunkIndex
if isFinalChunk {
if a.pricingManager != nil {
- cost := a.pricingManager.CalculateCost(result)
+ cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
chunk.Cost = bifrost.Ptr(cost)
}
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
diff --git a/framework/streaming/transcription.go b/framework/streaming/transcription.go
index 593c7f80b2..56fb3e477c 100644
--- a/framework/streaming/transcription.go
+++ b/framework/streaming/transcription.go
@@ -8,6 +8,7 @@ import (
bifrost "github.com/maximhq/bifrost/core"
"github.com/maximhq/bifrost/core/schemas"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
)
// buildCompleteMessageFromTranscriptionStreamChunks builds a complete message from accumulated transcription chunks
@@ -162,7 +163,7 @@ func (a *Accumulator) processTranscriptionStreamingResponse(ctx *schemas.Bifrost
}
if isFinalChunk {
if a.pricingManager != nil {
- cost := a.pricingManager.CalculateCost(result)
+ cost := a.pricingManager.CalculateCost(result, modelcatalog.PricingLookupScopesFromContext(ctx, string(result.GetExtraFields().Provider)))
chunk.Cost = bifrost.Ptr(cost)
}
chunk.SemanticCacheDebug = result.GetExtraFields().CacheDebug
diff --git a/framework/tracing/tracer.go b/framework/tracing/tracer.go
index 3d55ca2ff5..c5088b17ed 100644
--- a/framework/tracing/tracer.go
+++ b/framework/tracing/tracer.go
@@ -164,7 +164,7 @@ func (t *Tracer) PopulateLLMRequestAttributes(handle schemas.SpanHandle, req *sc
}
// PopulateLLMResponseAttributes populates all LLM-specific response attributes on the span.
-func (t *Tracer) PopulateLLMResponseAttributes(handle schemas.SpanHandle, resp *schemas.BifrostResponse, err *schemas.BifrostError) {
+func (t *Tracer) PopulateLLMResponseAttributes(ctx *schemas.BifrostContext, handle schemas.SpanHandle, resp *schemas.BifrostResponse, err *schemas.BifrostError) {
h, ok := handle.(*spanHandle)
if !ok || h == nil {
return
@@ -185,7 +185,7 @@ func (t *Tracer) PopulateLLMResponseAttributes(handle schemas.SpanHandle, resp *
}
// Populate cost attribute using pricing manager
if t.pricingManager != nil && resp != nil {
- cost := t.pricingManager.CalculateCost(resp)
+ cost := t.pricingManager.CalculateCost(resp, modelcatalog.PricingLookupScopesFromContext(ctx, string(resp.GetExtraFields().Provider)))
span.SetAttribute(schemas.AttrUsageCost, cost)
}
}
diff --git a/plugins/governance/main.go b/plugins/governance/main.go
index 200afe1237..0f6e8dd796 100644
--- a/plugins/governance/main.go
+++ b/plugins/governance/main.go
@@ -1223,6 +1223,9 @@ func (p *GovernancePlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche
isFinalChunk := bifrost.IsFinalChunk(ctx)
+ // Build pricing scopes from context using the governance VK ID (not the raw VK token)
+ pricingScopes := modelcatalog.PricingLookupScopesFromContext(ctx, string(provider))
+
// Always process usage tracking (with or without virtual key)
// When user auth is present, skip VK usage tracking to avoid double-counting
effectiveVK := virtualKey
@@ -1235,7 +1238,7 @@ func (p *GovernancePlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche
p.wg.Add(1)
go func() {
defer p.wg.Done()
- p.postHookWorker(result, provider, model, requestType, effectiveVK, requestID, userID, isCacheRead, isBatch, isFinalChunk)
+ p.postHookWorker(result, provider, model, requestType, effectiveVK, requestID, userID, isCacheRead, isBatch, isFinalChunk, pricingScopes)
}()
}
@@ -1419,13 +1422,15 @@ func (p *GovernancePlugin) Cleanup() error {
// - provider: The provider of the request
// - model: The model of the request
// - requestType: The type of the request
-// - virtualKey: The virtual key of the request (empty string if not present)
+// - virtualKey: The raw virtual key token of the request (empty string if not present)
+// - selectedKeyID: The selected provider key ID used for scoped pricing overrides
// - requestID: The request ID
// - userID: The user ID for enterprise user-level governance (empty string if not present)
// - isCacheRead: Whether the request is a cache read
// - isBatch: Whether the request is a batch request
// - isFinalChunk: Whether the request is the final chunk
-func (p *GovernancePlugin) postHookWorker(result *schemas.BifrostResponse, provider schemas.ModelProvider, model string, requestType schemas.RequestType, virtualKey, requestID, userID string, _, _, isFinalChunk bool) {
+// - pricingScopes: Prebuilt pricing lookup scopes using governance VK ID (nil if not applicable)
+func (p *GovernancePlugin) postHookWorker(result *schemas.BifrostResponse, provider schemas.ModelProvider, model string, requestType schemas.RequestType, virtualKey, requestID, userID string, _, _, isFinalChunk bool, pricingScopes *modelcatalog.PricingLookupScopes) {
// Determine if request was successful
success := (result != nil)
@@ -1435,7 +1440,7 @@ func (p *GovernancePlugin) postHookWorker(result *schemas.BifrostResponse, provi
if !isStreaming || (isStreaming && isFinalChunk) {
var cost float64
if p.modelCatalog != nil && result != nil {
- cost = p.modelCatalog.CalculateCost(result)
+ cost = p.modelCatalog.CalculateCost(result, pricingScopes)
}
tokensUsed := 0
if result != nil {
diff --git a/plugins/logging/main.go b/plugins/logging/main.go
index 3a2e501b69..160b67d064 100644
--- a/plugins/logging/main.go
+++ b/plugins/logging/main.go
@@ -226,7 +226,7 @@ type LoggerPlugin struct {
pendingLogs sync.Map // Maps requestID -> *PendingLogData (PreLLMHook input data awaiting PostLLMHook)
writeQueue chan *writeQueueEntry // Buffered channel for batch write queue
closed atomic.Bool // Set during cleanup to prevent sends on closed writeQueue
- deferredUsageSem chan struct{} // Limits concurrent deferred usage DB updates
+ deferredUsageSem chan struct{} // Limits concurrent deferred usage DB updates
}
// Init creates new logger plugin with given log store
@@ -778,7 +778,8 @@ func (p *LoggerPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *schemas.
}
entry.CacheDebugParsed = cacheDebug
if p.pricingManager != nil {
- if cost := p.pricingManager.CalculateCost(result); cost > 0 {
+ pricingScopes := modelcatalog.PricingLookupScopesFromContext(ctx, string(entry.Provider))
+ if cost := p.pricingManager.CalculateCost(result, pricingScopes); cost > 0 {
entry.Cost = &cost
}
}
diff --git a/plugins/logging/operations.go b/plugins/logging/operations.go
index 59d61987d9..9e41a6322b 100644
--- a/plugins/logging/operations.go
+++ b/plugins/logging/operations.go
@@ -9,6 +9,7 @@ import (
"github.com/bytedance/sonic"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/logstore"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
"github.com/maximhq/bifrost/framework/streaming"
)
@@ -1019,7 +1020,8 @@ func (p *LoggerPlugin) calculateCostForLog(logEntry *logstore.Log) (float64, err
resp.SpeechResponse.Usage = logEntry.SpeechOutputParsed.Usage
}
- return p.pricingManager.CalculateCost(resp), nil
+ scopes := pricingScopesForLog(logEntry)
+ return p.pricingManager.CalculateCost(resp, &scopes), nil
}
// buildResponseForRequestType wraps BifrostLLMUsage into the correct response
@@ -1067,19 +1069,19 @@ func buildResponseForRequestType(requestType schemas.RequestType, usage *schemas
CachedWriteTokens: usage.PromptTokensDetails.CachedWriteTokens,
}
}
- if usage.CompletionTokensDetails != nil {
- respUsage.OutputTokensDetails = &schemas.ResponsesResponseOutputTokens{
- TextTokens: usage.CompletionTokensDetails.TextTokens,
- AcceptedPredictionTokens: usage.CompletionTokensDetails.AcceptedPredictionTokens,
- AudioTokens: usage.CompletionTokensDetails.AudioTokens,
- ImageTokens: usage.CompletionTokensDetails.ImageTokens,
- ReasoningTokens: usage.CompletionTokensDetails.ReasoningTokens,
- RejectedPredictionTokens: usage.CompletionTokensDetails.RejectedPredictionTokens,
- CitationTokens: usage.CompletionTokensDetails.CitationTokens,
- NumSearchQueries: usage.CompletionTokensDetails.NumSearchQueries,
+ if usage.CompletionTokensDetails != nil {
+ respUsage.OutputTokensDetails = &schemas.ResponsesResponseOutputTokens{
+ TextTokens: usage.CompletionTokensDetails.TextTokens,
+ AcceptedPredictionTokens: usage.CompletionTokensDetails.AcceptedPredictionTokens,
+ AudioTokens: usage.CompletionTokensDetails.AudioTokens,
+ ImageTokens: usage.CompletionTokensDetails.ImageTokens,
+ ReasoningTokens: usage.CompletionTokensDetails.ReasoningTokens,
+ RejectedPredictionTokens: usage.CompletionTokensDetails.RejectedPredictionTokens,
+ CitationTokens: usage.CompletionTokensDetails.CitationTokens,
+ NumSearchQueries: usage.CompletionTokensDetails.NumSearchQueries,
+ }
}
}
- }
return &schemas.BifrostResponse{
ResponsesResponse: &schemas.BifrostResponsesResponse{
Usage: respUsage,
@@ -1151,3 +1153,20 @@ func buildResponseForRequestType(requestType schemas.RequestType, usage *schemas
}
}
}
+
+func pricingScopesForLog(logEntry *logstore.Log) modelcatalog.PricingLookupScopes {
+ if logEntry == nil {
+ return modelcatalog.PricingLookupScopes{}
+ }
+
+ virtualKeyID := ""
+ if logEntry.VirtualKeyID != nil {
+ virtualKeyID = *logEntry.VirtualKeyID
+ }
+
+ return modelcatalog.PricingLookupScopes{
+ Provider: logEntry.Provider,
+ SelectedKeyID: logEntry.SelectedKeyID,
+ VirtualKeyID: virtualKeyID,
+ }
+}
diff --git a/plugins/telemetry/main.go b/plugins/telemetry/main.go
index f8c2efe319..58d0bb07ed 100644
--- a/plugins/telemetry/main.go
+++ b/plugins/telemetry/main.go
@@ -425,6 +425,8 @@ func (p *PrometheusPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche
streamEndIndicatorValue := ctx.Value(schemas.BifrostContextKeyStreamEndIndicator)
isFinalChunk, hasFinalChunkIndicator := streamEndIndicatorValue.(bool)
+ pricingScopes := modelcatalog.PricingLookupScopesFromContext(ctx, string(provider))
+
// Calculate cost and record metrics in a separate goroutine to avoid blocking the main thread
go func() {
// For streaming requests, handle per-token metrics for intermediate chunks
@@ -447,7 +449,7 @@ func (p *PrometheusPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *sche
cost := 0.0
if p.pricingManager != nil && result != nil {
- cost = p.pricingManager.CalculateCost(result)
+ cost = p.pricingManager.CalculateCost(result, pricingScopes)
}
p.UpstreamRequestsTotal.WithLabelValues(promLabelValues...).Inc()
diff --git a/transports/bifrost-http/handlers/governance.go b/transports/bifrost-http/handlers/governance.go
index 731fd3b4b9..a0ee441bbf 100644
--- a/transports/bifrost-http/handlers/governance.go
+++ b/transports/bifrost-http/handlers/governance.go
@@ -19,6 +19,7 @@ import (
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/configstore"
configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
"github.com/maximhq/bifrost/plugins/governance"
"github.com/maximhq/bifrost/transports/bifrost-http/lib"
"github.com/valyala/fasthttp"
@@ -40,6 +41,8 @@ type GovernanceManager interface {
RemoveProvider(ctx context.Context, provider schemas.ModelProvider) error
ReloadRoutingRule(ctx context.Context, id string) error
RemoveRoutingRule(ctx context.Context, id string) error
+ UpsertPricingOverride(ctx context.Context, override *configstoreTables.TablePricingOverride) error
+ DeletePricingOverride(ctx context.Context, id string) error
}
// GovernanceHandler manages HTTP requests for governance operations
@@ -296,6 +299,12 @@ func (h *GovernanceHandler) RegisterRoutes(r *router.Router, middlewares ...sche
r.GET("/api/governance/providers", lib.ChainMiddlewares(h.getProviderGovernance, middlewares...))
r.PUT("/api/governance/providers/{provider_name}", lib.ChainMiddlewares(h.updateProviderGovernance, middlewares...))
r.DELETE("/api/governance/providers/{provider_name}", lib.ChainMiddlewares(h.deleteProviderGovernance, middlewares...))
+
+ // Pricing override operations
+ r.GET("/api/governance/pricing-overrides", lib.ChainMiddlewares(h.getPricingOverrides, middlewares...))
+ r.POST("/api/governance/pricing-overrides", lib.ChainMiddlewares(h.createPricingOverride, middlewares...))
+ r.PUT("/api/governance/pricing-overrides/{id}", lib.ChainMiddlewares(h.updatePricingOverride, middlewares...))
+ r.DELETE("/api/governance/pricing-overrides/{id}", lib.ChainMiddlewares(h.deletePricingOverride, middlewares...))
}
// Virtual Key CRUD Operations
@@ -3244,6 +3253,376 @@ func (h *GovernanceHandler) deleteRoutingRule(ctx *fasthttp.RequestCtx) {
})
}
+// ---------------------------------------------------------------------------
+// Pricing Override Operations
+// ---------------------------------------------------------------------------
+
+// CreatePricingOverrideRequest is the request payload for creating a governance
+// pricing override.
+type CreatePricingOverrideRequest struct {
+ Name string `json:"name"`
+ ScopeKind modelcatalog.ScopeKind `json:"scope_kind"`
+ VirtualKeyID *string `json:"virtual_key_id,omitempty"`
+ ProviderID *string `json:"provider_id,omitempty"`
+ ProviderKeyID *string `json:"provider_key_id,omitempty"`
+ MatchType modelcatalog.MatchType `json:"match_type"`
+ Pattern string `json:"pattern"`
+ RequestTypes []schemas.RequestType `json:"request_types,omitempty"`
+ Patch modelcatalog.PricingOptions `json:"patch,omitempty"`
+}
+
+// nullableString tracks whether a JSON string field was explicitly present in
+// the request body (even as null), so the merge logic can distinguish "omitted"
+// (leave existing value) from "set to null" (clear the value).
+type nullableString struct {
+ Value *string
+ Set bool
+}
+
+func (n *nullableString) UnmarshalJSON(b []byte) error {
+ n.Set = true
+ if string(b) == "null" {
+ n.Value = nil
+ return nil
+ }
+ var s string
+ if err := json.Unmarshal(b, &s); err != nil {
+ return err
+ }
+ n.Value = &s
+ return nil
+}
+
+// UpdatePricingOverrideRequest is the request payload for updating a governance
+// pricing override. All fields except Patch are optional — omitted fields are
+// merged from the existing record. Patch is always replaced in full.
+type UpdatePricingOverrideRequest struct {
+ Name *string `json:"name,omitempty"`
+ ScopeKind *modelcatalog.ScopeKind `json:"scope_kind,omitempty"`
+ VirtualKeyID nullableString `json:"virtual_key_id"`
+ ProviderID nullableString `json:"provider_id"`
+ ProviderKeyID nullableString `json:"provider_key_id"`
+ MatchType *modelcatalog.MatchType `json:"match_type,omitempty"`
+ Pattern *string `json:"pattern,omitempty"`
+ RequestTypes []schemas.RequestType `json:"request_types,omitempty"`
+ Patch *modelcatalog.PricingOptions `json:"patch,omitempty"`
+}
+
+func (h *GovernanceHandler) getPricingOverrides(ctx *fasthttp.RequestCtx) {
+ // Parse filter parameters
+ var scopeKind, virtualKeyID, providerID, providerKeyID *string
+ if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("scope_kind"))); v != "" {
+ scopeKind = &v
+ }
+ if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("virtual_key_id"))); v != "" {
+ virtualKeyID = &v
+ }
+ if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("provider_id"))); v != "" {
+ providerID = &v
+ }
+ if v := strings.TrimSpace(string(ctx.QueryArgs().Peek("provider_key_id"))); v != "" {
+ providerKeyID = &v
+ }
+
+ // Check for pagination parameters
+ limitStr := string(ctx.QueryArgs().Peek("limit"))
+ offsetStr := string(ctx.QueryArgs().Peek("offset"))
+ search := string(ctx.QueryArgs().Peek("search"))
+
+ if limitStr != "" || offsetStr != "" || search != "" {
+ params := configstore.PricingOverridesQueryParams{
+ Search: search,
+ ScopeKind: scopeKind,
+ VirtualKeyID: virtualKeyID,
+ ProviderID: providerID,
+ ProviderKeyID: providerKeyID,
+ }
+ if limitStr != "" {
+ n, err := strconv.Atoi(limitStr)
+ if err != nil {
+ SendError(ctx, 400, "Invalid limit parameter: must be a number")
+ return
+ }
+ if n < 0 {
+ SendError(ctx, 400, "Invalid limit parameter: must be non-negative")
+ return
+ }
+ params.Limit = n
+ }
+ if offsetStr != "" {
+ n, err := strconv.Atoi(offsetStr)
+ if err != nil {
+ SendError(ctx, 400, "Invalid offset parameter: must be a number")
+ return
+ }
+ if n < 0 {
+ SendError(ctx, 400, "Invalid offset parameter: must be non-negative")
+ return
+ }
+ params.Offset = n
+ }
+
+ params.Limit, params.Offset = ClampPaginationParams(params.Limit, params.Offset)
+ overrides, totalCount, err := h.configStore.GetPricingOverridesPaginated(ctx, params)
+ if err != nil {
+ logger.Error("failed to retrieve pricing overrides: %v", err)
+ SendError(ctx, fasthttp.StatusInternalServerError, "Failed to retrieve pricing overrides")
+ return
+ }
+ SendJSON(ctx, map[string]interface{}{
+ "pricing_overrides": overrides,
+ "count": len(overrides),
+ "total_count": totalCount,
+ "limit": params.Limit,
+ "offset": params.Offset,
+ })
+ return
+ }
+
+ // Non-paginated path: return all matching overrides (backward compatible)
+ filters := configstore.PricingOverrideFilters{
+ ScopeKind: scopeKind,
+ VirtualKeyID: virtualKeyID,
+ ProviderID: providerID,
+ ProviderKeyID: providerKeyID,
+ }
+ overrides, err := h.configStore.GetPricingOverrides(ctx, filters)
+ if err != nil {
+ logger.Error("failed to retrieve pricing overrides: %v", err)
+ SendError(ctx, fasthttp.StatusInternalServerError, "Failed to retrieve pricing overrides")
+ return
+ }
+
+ SendJSON(ctx, map[string]interface{}{
+ "pricing_overrides": overrides,
+ "count": len(overrides),
+ "total_count": len(overrides),
+ "limit": len(overrides),
+ "offset": 0,
+ })
+}
+
+func (h *GovernanceHandler) createPricingOverride(ctx *fasthttp.RequestCtx) {
+ var req CreatePricingOverrideRequest
+ if err := json.Unmarshal(ctx.PostBody(), &req); err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, "Invalid JSON")
+ return
+ }
+
+ name, err := normalizeAndValidatePricingOverrideName(req.Name)
+ if err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, err.Error())
+ return
+ }
+
+ shape := modelcatalog.PricingOverride{
+ ScopeKind: req.ScopeKind,
+ VirtualKeyID: req.VirtualKeyID,
+ ProviderID: req.ProviderID,
+ ProviderKeyID: req.ProviderKeyID,
+ MatchType: req.MatchType,
+ Pattern: req.Pattern,
+ RequestTypes: req.RequestTypes,
+ }
+ if err := shape.IsValid(); err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, err.Error())
+ return
+ }
+
+ patchJSON, err := sonic.Marshal(req.Patch)
+ if err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, "Invalid patch")
+ return
+ }
+
+ now := time.Now()
+ override := configstoreTables.TablePricingOverride{
+ ID: uuid.NewString(),
+ Name: name,
+ ScopeKind: string(req.ScopeKind),
+ VirtualKeyID: normalizeOptionalString(req.VirtualKeyID),
+ ProviderID: normalizeOptionalString(req.ProviderID),
+ ProviderKeyID: normalizeOptionalString(req.ProviderKeyID),
+ MatchType: string(req.MatchType),
+ Pattern: strings.TrimSpace(req.Pattern),
+ RequestTypes: req.RequestTypes,
+ PricingPatchJSON: string(patchJSON),
+ ConfigHash: "",
+ CreatedAt: now,
+ UpdatedAt: now,
+ }
+
+ if err := h.configStore.CreatePricingOverride(ctx, &override); err != nil {
+ logger.Error("failed to create pricing override: %v", err)
+ SendError(ctx, fasthttp.StatusInternalServerError, "Failed to create pricing override")
+ return
+ }
+
+ if err := h.governanceManager.UpsertPricingOverride(ctx, &override); err != nil {
+ logger.Error("failed to upsert pricing override: %v", err)
+ SendError(ctx, fasthttp.StatusInternalServerError, "Failed to upsert pricing override")
+ return
+ }
+ SendJSONWithStatus(ctx, map[string]interface{}{
+ "message": "Pricing override created successfully",
+ "pricing_override": override,
+ }, fasthttp.StatusCreated)
+}
+
+func (h *GovernanceHandler) updatePricingOverride(ctx *fasthttp.RequestCtx) {
+ id := ctx.UserValue("id").(string)
+
+ var req UpdatePricingOverrideRequest
+ if err := json.Unmarshal(ctx.PostBody(), &req); err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, "Invalid JSON")
+ return
+ }
+
+ existing, err := h.configStore.GetPricingOverrideByID(ctx, id)
+ if err != nil {
+ if errors.Is(err, configstore.ErrNotFound) {
+ SendError(ctx, fasthttp.StatusNotFound, "Pricing override not found")
+ return
+ }
+ SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("Failed to retrieve pricing override: %v", err))
+ return
+ }
+
+ // Merge request fields onto the existing record; omitted fields keep their current values.
+ merged := modelcatalog.PricingOverride{
+ ScopeKind: modelcatalog.ScopeKind(existing.ScopeKind),
+ VirtualKeyID: existing.VirtualKeyID,
+ ProviderID: existing.ProviderID,
+ ProviderKeyID: existing.ProviderKeyID,
+ MatchType: modelcatalog.MatchType(existing.MatchType),
+ Pattern: existing.Pattern,
+ RequestTypes: existing.RequestTypes,
+ }
+ if req.ScopeKind != nil {
+ merged.ScopeKind = *req.ScopeKind
+ // Changing scope_kind resets all scope IDs; only what the request
+ // explicitly provides will be kept.
+ merged.VirtualKeyID = nil
+ merged.ProviderID = nil
+ merged.ProviderKeyID = nil
+ }
+ if req.VirtualKeyID.Set {
+ merged.VirtualKeyID = req.VirtualKeyID.Value
+ }
+ if req.ProviderID.Set {
+ merged.ProviderID = req.ProviderID.Value
+ }
+ if req.ProviderKeyID.Set {
+ merged.ProviderKeyID = req.ProviderKeyID.Value
+ }
+ if req.MatchType != nil {
+ merged.MatchType = *req.MatchType
+ }
+ if req.Pattern != nil {
+ merged.Pattern = *req.Pattern
+ }
+ if req.RequestTypes != nil {
+ merged.RequestTypes = req.RequestTypes
+ }
+
+ if err := merged.IsValid(); err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, err.Error())
+ return
+ }
+
+ // Resolve name: use provided value or fall back to existing.
+ nameStr := existing.Name
+ if req.Name != nil {
+ nameStr, err = normalizeAndValidatePricingOverrideName(*req.Name)
+ if err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, err.Error())
+ return
+ }
+ }
+
+ // Patch JSON: always replace in full with whatever is provided (or keep existing if omitted).
+ pricingPatchJSON := existing.PricingPatchJSON
+ if req.Patch != nil {
+ b, err := sonic.Marshal(req.Patch)
+ if err != nil {
+ SendError(ctx, fasthttp.StatusBadRequest, "Invalid patch")
+ return
+ }
+ pricingPatchJSON = string(b)
+ }
+
+ override := configstoreTables.TablePricingOverride{
+ ID: id,
+ Name: nameStr,
+ ScopeKind: string(merged.ScopeKind),
+ VirtualKeyID: normalizeOptionalString(merged.VirtualKeyID),
+ ProviderID: normalizeOptionalString(merged.ProviderID),
+ ProviderKeyID: normalizeOptionalString(merged.ProviderKeyID),
+ MatchType: string(merged.MatchType),
+ Pattern: strings.TrimSpace(merged.Pattern),
+ RequestTypes: merged.RequestTypes,
+ PricingPatchJSON: pricingPatchJSON,
+ ConfigHash: existing.ConfigHash,
+ CreatedAt: existing.CreatedAt,
+ UpdatedAt: time.Now(),
+ }
+
+ if err := h.configStore.UpdatePricingOverride(ctx, &override); err != nil {
+ logger.Error("failed to update pricing override: %v", err)
+ SendError(ctx, fasthttp.StatusInternalServerError, "Failed to update pricing override")
+ return
+ }
+
+ if err := h.governanceManager.UpsertPricingOverride(ctx, &override); err != nil {
+ logger.Error("failed to upsert pricing override: %v", err)
+ SendError(ctx, fasthttp.StatusInternalServerError, "Failed to upsert pricing override")
+ return
+ }
+ SendJSON(ctx, map[string]interface{}{
+ "message": "Pricing override updated successfully",
+ "pricing_override": override,
+ })
+}
+
+func (h *GovernanceHandler) deletePricingOverride(ctx *fasthttp.RequestCtx) {
+ id := ctx.UserValue("id").(string)
+ if err := h.configStore.DeletePricingOverride(ctx, id); err != nil {
+ if errors.Is(err, configstore.ErrNotFound) {
+ SendError(ctx, fasthttp.StatusNotFound, "Pricing override not found")
+ return
+ }
+ logger.Error("failed to delete pricing override: %v", err)
+ SendError(ctx, fasthttp.StatusInternalServerError, "Failed to delete pricing override")
+ return
+ }
+
+ if err := h.governanceManager.DeletePricingOverride(ctx, id); err != nil {
+ logger.Warn("failed to delete pricing override from memory: %v", err)
+ }
+ SendJSON(ctx, map[string]interface{}{
+ "message": "Pricing override deleted successfully",
+ })
+}
+
+func normalizeAndValidatePricingOverrideName(name string) (string, error) {
+ trimmed := strings.TrimSpace(name)
+ if trimmed == "" {
+ return "", errors.New("name is required")
+ }
+ return trimmed, nil
+}
+
+func normalizeOptionalString(value *string) *string {
+ if value == nil {
+ return nil
+ }
+ trimmed := strings.TrimSpace(*value)
+ if trimmed == "" {
+ return nil
+ }
+ return &trimmed
+}
+
// validRoutingScopes contains the allowed scope values for routing rules
var validRoutingScopes = map[string]bool{
"global": true,
diff --git a/transports/bifrost-http/handlers/inference.go b/transports/bifrost-http/handlers/inference.go
index 326ba7d342..e4a9ee9028 100644
--- a/transports/bifrost-http/handlers/inference.go
+++ b/transports/bifrost-http/handlers/inference.go
@@ -745,9 +745,12 @@ func (h *CompletionHandler) listModels(ctx *fasthttp.RequestCtx) {
pricingEntry = h.config.ModelCatalog.GetPricingEntryForModel(*modelEntry.Deployment, provider)
}
if pricingEntry != nil && modelEntry.Pricing == nil {
- pricing := &schemas.Pricing{
- Prompt: bifrost.Ptr(fmt.Sprintf("%.10f", pricingEntry.InputCostPerToken)),
- Completion: bifrost.Ptr(fmt.Sprintf("%.10f", pricingEntry.OutputCostPerToken)),
+ pricing := &schemas.Pricing{}
+ if pricingEntry.InputCostPerToken != nil {
+ pricing.Prompt = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.InputCostPerToken))
+ }
+ if pricingEntry.OutputCostPerToken != nil {
+ pricing.Completion = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.OutputCostPerToken))
}
if pricingEntry.InputCostPerImage != nil {
pricing.Image = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.InputCostPerImage))
@@ -755,6 +758,9 @@ func (h *CompletionHandler) listModels(ctx *fasthttp.RequestCtx) {
if pricingEntry.CacheReadInputTokenCost != nil {
pricing.InputCacheRead = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.CacheReadInputTokenCost))
}
+ if pricingEntry.CacheCreationInputTokenCost != nil {
+ pricing.InputCacheWrite = bifrost.Ptr(fmt.Sprintf("%.10f", *pricingEntry.CacheCreationInputTokenCost))
+ }
resp.Data[i].Pricing = pricing
}
}
diff --git a/transports/bifrost-http/handlers/pricing_override_test.go b/transports/bifrost-http/handlers/pricing_override_test.go
new file mode 100644
index 0000000000..4d19d0541e
--- /dev/null
+++ b/transports/bifrost-http/handlers/pricing_override_test.go
@@ -0,0 +1,149 @@
+package handlers
+
+import (
+ "context"
+ "encoding/json"
+ "net"
+ "os"
+ "testing"
+ "time"
+
+ "github.com/maximhq/bifrost/core/schemas"
+ "github.com/maximhq/bifrost/framework/configstore"
+ configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables"
+ "github.com/maximhq/bifrost/framework/modelcatalog"
+ "github.com/maximhq/bifrost/plugins/governance"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "github.com/valyala/fasthttp"
+)
+
+type pricingOverrideTestGovernanceManager struct{}
+
+func (pricingOverrideTestGovernanceManager) GetGovernanceData() *governance.GovernanceData {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) ReloadVirtualKey(context.Context, string) (*configstoreTables.TableVirtualKey, error) {
+ return nil, nil
+}
+func (pricingOverrideTestGovernanceManager) RemoveVirtualKey(context.Context, string) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) ReloadTeam(context.Context, string) (*configstoreTables.TableTeam, error) {
+ return nil, nil
+}
+func (pricingOverrideTestGovernanceManager) RemoveTeam(context.Context, string) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) ReloadCustomer(context.Context, string) (*configstoreTables.TableCustomer, error) {
+ return nil, nil
+}
+func (pricingOverrideTestGovernanceManager) RemoveCustomer(context.Context, string) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) ReloadModelConfig(context.Context, string) (*configstoreTables.TableModelConfig, error) {
+ return nil, nil
+}
+func (pricingOverrideTestGovernanceManager) RemoveModelConfig(context.Context, string) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) ReloadProvider(context.Context, schemas.ModelProvider) (*configstoreTables.TableProvider, error) {
+ return nil, nil
+}
+func (pricingOverrideTestGovernanceManager) RemoveProvider(context.Context, schemas.ModelProvider) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) ReloadRoutingRule(context.Context, string) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) RemoveRoutingRule(context.Context, string) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) UpsertPricingOverride(context.Context, *configstoreTables.TablePricingOverride) error {
+ return nil
+}
+func (pricingOverrideTestGovernanceManager) DeletePricingOverride(context.Context, string) error {
+ return nil
+}
+
+func setupPricingOverrideHandlerStore(t *testing.T) configstore.ConfigStore {
+ t.Helper()
+
+ dbPath := t.TempDir() + "/config.db"
+ store, err := configstore.NewConfigStore(context.Background(), &configstore.Config{
+ Enabled: true,
+ Type: configstore.ConfigStoreTypeSQLite,
+ Config: &configstore.SQLiteConfig{
+ Path: dbPath,
+ },
+ }, &mockLogger{})
+ require.NoError(t, err)
+
+ t.Cleanup(func() {
+ _ = os.Remove(dbPath)
+ })
+ return store
+}
+
+func newTestRequestCtx(body string) *fasthttp.RequestCtx {
+ var req fasthttp.Request
+ req.SetBodyString(body)
+
+ ctx := &fasthttp.RequestCtx{}
+ ctx.Init(&req, &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 12345}, nil)
+ return ctx
+}
+
+func TestUpdatePricingOverride_ReplacesFullBody(t *testing.T) {
+ SetLogger(&mockLogger{})
+ store := setupPricingOverrideHandlerStore(t)
+ handler := &GovernanceHandler{
+ configStore: store,
+ governanceManager: pricingOverrideTestGovernanceManager{},
+ }
+
+ now := time.Now().UTC()
+ override := configstoreTables.TablePricingOverride{
+ ID: "override-1",
+ Name: "Original",
+ ScopeKind: string(modelcatalog.ScopeKindGlobal),
+ MatchType: string(modelcatalog.MatchTypeExact),
+ Pattern: "gpt-4.1",
+ CreatedAt: now,
+ UpdatedAt: now,
+ PricingPatchJSON: `{"input_cost_per_token":1,"output_cost_per_token":2}`,
+ RequestTypes: []schemas.RequestType{schemas.ChatCompletionRequest},
+ }
+ require.NoError(t, store.CreatePricingOverride(context.Background(), &override))
+
+ // Patch replaces in full: send only input_cost_per_token.
+ // output_cost_per_token must be absent from the stored patch afterwards,
+ // confirming full-replace (not merge) semantics.
+ body := `{
+ "name":"Updated",
+ "scope_kind":"global",
+ "match_type":"exact",
+ "pattern":"gpt-4.1",
+ "request_types":["chat_completion"],
+ "patch":{"input_cost_per_token":1.5}
+ }`
+ ctx := newTestRequestCtx(body)
+ ctx.SetUserValue("id", override.ID)
+
+ handler.updatePricingOverride(ctx)
+
+ require.Equal(t, fasthttp.StatusOK, ctx.Response.StatusCode(), string(ctx.Response.Body()))
+
+ stored, err := store.GetPricingOverrideByID(context.Background(), override.ID)
+ require.NoError(t, err)
+ assert.Equal(t, "Updated", stored.Name)
+
+ var patch modelcatalog.PricingOptions
+ require.NoError(t, json.Unmarshal([]byte(stored.PricingPatchJSON), &patch))
+ // Sent field must reflect the new value.
+ require.NotNil(t, patch.InputCostPerToken)
+ assert.Equal(t, 1.5, *patch.InputCostPerToken)
+ // Omitted field must be cleared — patch is always fully replaced, not merged.
+ assert.Nil(t, patch.OutputCostPerToken)
+ assert.Empty(t, stored.ConfigHash)
+}
diff --git a/transports/bifrost-http/handlers/providers.go b/transports/bifrost-http/handlers/providers.go
index a00ee3789a..eb88d04908 100644
--- a/transports/bifrost-http/handlers/providers.go
+++ b/transports/bifrost-http/handlers/providers.go
@@ -8,7 +8,6 @@ import (
"errors"
"fmt"
"net/url"
- "regexp"
"slices"
"sort"
"strings"
@@ -60,19 +59,18 @@ const (
// ProviderResponse represents the response for provider operations
type ProviderResponse struct {
- Name schemas.ModelProvider `json:"name"`
- Keys []schemas.Key `json:"keys"` // API keys for the provider
- NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings
- ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings
- ProxyConfig *schemas.ProxyConfig `json:"proxy_config"` // Proxy configuration
- SendBackRawRequest bool `json:"send_back_raw_request"` // Include raw request in BifrostResponse
- SendBackRawResponse bool `json:"send_back_raw_response"` // Include raw response in BifrostResponse
- CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration
- PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides
- ProviderStatus ProviderStatus `json:"provider_status"` // Health/initialization status of the provider
- Status string `json:"status,omitempty"` // Operational status (e.g., list_models_failed)
- Description string `json:"description,omitempty"` // Error/status description
- ConfigHash string `json:"config_hash,omitempty"` // Hash of config.json version, used for change detection
+ Name schemas.ModelProvider `json:"name"`
+ Keys []schemas.Key `json:"keys"` // API keys for the provider
+ NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings
+ ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings
+ ProxyConfig *schemas.ProxyConfig `json:"proxy_config"` // Proxy configuration
+ SendBackRawRequest bool `json:"send_back_raw_request"` // Include raw request in BifrostResponse
+ SendBackRawResponse bool `json:"send_back_raw_response"` // Include raw response in BifrostResponse
+ CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration
+ ProviderStatus ProviderStatus `json:"provider_status"` // Health/initialization status of the provider
+ Status string `json:"status,omitempty"` // Operational status (e.g., list_models_failed)
+ Description string `json:"description,omitempty"` // Error/status description
+ ConfigHash string `json:"config_hash,omitempty"` // Hash of config.json version, used for change detection
}
// ListProvidersResponse represents the response for listing all providers
@@ -185,7 +183,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) {
SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse
SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse
CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration
- PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides
}{}
if err := json.Unmarshal(ctx.PostBody(), &payload); err != nil {
SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid JSON: %v", err))
@@ -226,10 +223,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) {
return
}
}
- if err := validatePricingOverrides(payload.PricingOverrides); err != nil {
- SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("invalid pricing overrides: %v", err))
- return
- }
// Validate retry backoff values if NetworkConfig is provided
if payload.NetworkConfig != nil {
if err := validateRetryBackoff(payload.NetworkConfig); err != nil {
@@ -257,7 +250,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) {
SendBackRawRequest: payload.SendBackRawRequest != nil && *payload.SendBackRawRequest,
SendBackRawResponse: payload.SendBackRawResponse != nil && *payload.SendBackRawResponse,
CustomProviderConfig: payload.CustomProviderConfig,
- PricingOverrides: payload.PricingOverrides,
}
// Validate custom provider configuration before persisting
if err := lib.ValidateCustomProvider(config, payload.Provider); err != nil {
@@ -274,11 +266,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) {
SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("Failed to add provider: %v", err))
return
}
- if h.inMemoryStore.ModelCatalog != nil {
- if err := h.inMemoryStore.ModelCatalog.SetProviderPricingOverrides(payload.Provider, config.PricingOverrides); err != nil {
- logger.Warn("Failed to set pricing overrides for provider %s: %v", payload.Provider, err)
- }
- }
logger.Info("Provider %s added successfully", payload.Provider)
// Attempt model discovery
@@ -300,7 +287,6 @@ func (h *ProviderHandler) addProvider(ctx *fasthttp.RequestCtx) {
SendBackRawRequest: config.SendBackRawRequest,
SendBackRawResponse: config.SendBackRawResponse,
CustomProviderConfig: config.CustomProviderConfig,
- PricingOverrides: config.PricingOverrides,
Status: config.Status,
Description: config.Description,
}, ProviderStatusActive)
@@ -327,24 +313,19 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) {
}
var payload = struct {
- Keys []schemas.Key `json:"keys"` // API keys for the provider
- NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings
- ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings
- ProxyConfig *schemas.ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration
- SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse
- SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse
- CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration
- PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides
+ Keys []schemas.Key `json:"keys"` // API keys for the provider
+ NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings
+ ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings
+ ProxyConfig *schemas.ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration
+ SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse
+ SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse
+ CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration
}{}
if err := sonic.Unmarshal(ctx.PostBody(), &payload); err != nil {
SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("Invalid JSON: %v", err))
return
}
- if err := validatePricingOverrides(payload.PricingOverrides); err != nil {
- SendError(ctx, fasthttp.StatusBadRequest, fmt.Sprintf("invalid pricing overrides: %v", err))
- return
- }
// Get the raw config to access actual values for merging with redacted request values
oldConfigRaw, err := h.inMemoryStore.GetProviderConfigRaw(provider)
@@ -380,7 +361,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) {
ConcurrencyAndBufferSize: oldConfigRaw.ConcurrencyAndBufferSize,
ProxyConfig: oldConfigRaw.ProxyConfig,
CustomProviderConfig: oldConfigRaw.CustomProviderConfig,
- PricingOverrides: oldConfigRaw.PricingOverrides,
Status: oldConfigRaw.Status,
Description: oldConfigRaw.Description,
}
@@ -466,7 +446,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) {
config.ProxyConfig = payload.ProxyConfig
config.CustomProviderConfig = payload.CustomProviderConfig
- config.PricingOverrides = payload.PricingOverrides
if payload.SendBackRawRequest != nil {
config.SendBackRawRequest = *payload.SendBackRawRequest
}
@@ -500,12 +479,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) {
SendError(ctx, fasthttp.StatusInternalServerError, fmt.Sprintf("Failed to update provider: %v", err))
return
}
- if h.inMemoryStore.ModelCatalog != nil {
- if err := h.inMemoryStore.ModelCatalog.SetProviderPricingOverrides(provider, config.PricingOverrides); err != nil {
- logger.Warn("Failed to set pricing overrides for provider %s: %v", provider, err)
- }
- }
-
// Attempt model discovery
err = h.attemptModelDiscovery(ctx, provider, payload.CustomProviderConfig)
@@ -525,7 +498,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) {
SendBackRawRequest: config.SendBackRawRequest,
SendBackRawResponse: config.SendBackRawResponse,
CustomProviderConfig: config.CustomProviderConfig,
- PricingOverrides: config.PricingOverrides,
Status: config.Status,
Description: config.Description,
}, ProviderStatusActive)
@@ -1083,7 +1055,6 @@ func (h *ProviderHandler) getProviderResponseFromConfig(provider schemas.ModelPr
SendBackRawRequest: config.SendBackRawRequest,
SendBackRawResponse: config.SendBackRawResponse,
CustomProviderConfig: config.CustomProviderConfig,
- PricingOverrides: config.PricingOverrides,
ProviderStatus: status,
Status: config.Status,
Description: config.Description,
@@ -1091,101 +1062,6 @@ func (h *ProviderHandler) getProviderResponseFromConfig(provider schemas.ModelPr
}
}
-func validatePricingOverrides(overrides []schemas.ProviderPricingOverride) error {
- for i, override := range overrides {
- if strings.TrimSpace(override.ModelPattern) == "" {
- return fmt.Errorf("override[%d]: model_pattern is required", i)
- }
-
- switch override.MatchType {
- case schemas.PricingOverrideMatchExact:
- if strings.Contains(override.ModelPattern, "*") {
- return fmt.Errorf("override[%d]: exact match_type cannot include '*'", i)
- }
- case schemas.PricingOverrideMatchWildcard:
- if !strings.Contains(override.ModelPattern, "*") {
- return fmt.Errorf("override[%d]: wildcard match_type requires '*' in model_pattern", i)
- }
- case schemas.PricingOverrideMatchRegex:
- if _, err := regexp.Compile(override.ModelPattern); err != nil {
- return fmt.Errorf("override[%d]: invalid regex pattern: %w", i, err)
- }
- default:
- return fmt.Errorf("override[%d]: unsupported match_type %q", i, override.MatchType)
- }
-
- for _, requestType := range override.RequestTypes {
- if !isSupportedOverrideRequestType(requestType) {
- return fmt.Errorf("override[%d]: unsupported request_type %q", i, requestType)
- }
- }
-
- if err := validatePricingOverrideNonNegativeFields(i, override); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-func isSupportedOverrideRequestType(requestType schemas.RequestType) bool {
- switch requestType {
- case schemas.TextCompletionRequest,
- schemas.TextCompletionStreamRequest,
- schemas.ChatCompletionRequest,
- schemas.ChatCompletionStreamRequest,
- schemas.ResponsesRequest,
- schemas.ResponsesStreamRequest,
- schemas.EmbeddingRequest,
- schemas.RerankRequest,
- schemas.SpeechRequest,
- schemas.SpeechStreamRequest,
- schemas.TranscriptionRequest,
- schemas.TranscriptionStreamRequest,
- schemas.ImageGenerationRequest,
- schemas.ImageGenerationStreamRequest:
- return true
- default:
- return false
- }
-}
-
-func validatePricingOverrideNonNegativeFields(index int, override schemas.ProviderPricingOverride) error {
- optionalValues := map[string]*float64{
- "input_cost_per_token": override.InputCostPerToken,
- "output_cost_per_token": override.OutputCostPerToken,
- "input_cost_per_video_per_second": override.InputCostPerVideoPerSecond,
- "input_cost_per_audio_per_second": override.InputCostPerAudioPerSecond,
- "input_cost_per_character": override.InputCostPerCharacter,
- "input_cost_per_token_above_128k_tokens": override.InputCostPerTokenAbove128kTokens,
- "input_cost_per_image_above_128k_tokens": override.InputCostPerImageAbove128kTokens,
- "input_cost_per_video_per_second_above_128k_tokens": override.InputCostPerVideoPerSecondAbove128kTokens,
- "input_cost_per_audio_per_second_above_128k_tokens": override.InputCostPerAudioPerSecondAbove128kTokens,
- "output_cost_per_token_above_128k_tokens": override.OutputCostPerTokenAbove128kTokens,
- "input_cost_per_token_above_200k_tokens": override.InputCostPerTokenAbove200kTokens,
- "output_cost_per_token_above_200k_tokens": override.OutputCostPerTokenAbove200kTokens,
- "cache_creation_input_token_cost_above_200k_tokens": override.CacheCreationInputTokenCostAbove200kTokens,
- "cache_read_input_token_cost_above_200k_tokens": override.CacheReadInputTokenCostAbove200kTokens,
- "cache_read_input_token_cost": override.CacheReadInputTokenCost,
- "cache_creation_input_token_cost": override.CacheCreationInputTokenCost,
- "input_cost_per_token_batches": override.InputCostPerTokenBatches,
- "output_cost_per_token_batches": override.OutputCostPerTokenBatches,
- "input_cost_per_image_token": override.InputCostPerImageToken,
- "output_cost_per_image_token": override.OutputCostPerImageToken,
- "input_cost_per_image": override.InputCostPerImage,
- "output_cost_per_image": override.OutputCostPerImage,
- "cache_read_input_image_token_cost": override.CacheReadInputImageTokenCost,
- }
-
- for fieldName, value := range optionalValues {
- if value != nil && *value < 0 {
- return fmt.Errorf("override[%d]: %s must be non-negative", index, fieldName)
- }
- }
-
- return nil
-}
-
func getProviderFromCtx(ctx *fasthttp.RequestCtx) (schemas.ModelProvider, error) {
providerValue := ctx.UserValue("provider")
if providerValue == nil {
diff --git a/transports/bifrost-http/lib/config.go b/transports/bifrost-http/lib/config.go
index de53b1e8b0..9c53c17a67 100644
--- a/transports/bifrost-http/lib/config.go
+++ b/transports/bifrost-http/lib/config.go
@@ -440,13 +440,23 @@ func loadConfigFromFile(ctx context.Context, config *Config, data []byte) (*Conf
// Load MCP config
loadMCPConfigFromFile(ctx, config, &configData)
// Load governance config
- loadGovernanceConfigFromFile(ctx, config, &configData)
+ if err = loadGovernanceConfigFromFile(ctx, config, &configData); err != nil {
+ return nil, err
+ }
// Load auth config
loadAuthConfigFromFile(ctx, config, &configData)
// Load plugins
loadPluginsFromFile(ctx, config, &configData)
// Initialize framework config and pricing manager
initFrameworkConfigFromFile(ctx, config, &configData)
+ // ModelCatalog is now initialized; replay pricing overrides for the no-store path.
+ // loadGovernanceConfigFromFile ran before ModelCatalog existed, so the in-memory
+ // load was skipped. Do it here now that ModelCatalog is available.
+ if config.ConfigStore == nil && config.ModelCatalog != nil && config.GovernanceConfig != nil && len(config.GovernanceConfig.PricingOverrides) > 0 {
+ if err := config.ModelCatalog.SetPricingOverrides(config.GovernanceConfig.PricingOverrides); err != nil {
+ return nil, fmt.Errorf("failed to set pricing overrides from config file: %w", err)
+ }
+ }
// Sync encryption: encrypt any plaintext rows written during config loading
syncEncryption(ctx, config)
// Load WebSocket config (always enabled, apply defaults for any missing values)
@@ -912,7 +922,7 @@ func mergeMCPConfig(ctx context.Context, config *Config, configData *ConfigData,
}
// loadGovernanceConfigFromFile loads and merges governance config from file
-func loadGovernanceConfigFromFile(ctx context.Context, config *Config, configData *ConfigData) {
+func loadGovernanceConfigFromFile(ctx context.Context, config *Config, configData *ConfigData) error {
var governanceConfig *configstore.GovernanceConfig
var err error
// Checking from the store
@@ -936,10 +946,15 @@ func loadGovernanceConfigFromFile(ctx context.Context, config *Config, configDat
// No governance config in store, use config file
logger.Debug("no governance config found in store, processing from config file")
config.GovernanceConfig = configData.Governance
- createGovernanceConfigInStore(ctx, config)
+ if err := createGovernanceConfigInStore(ctx, config); err != nil {
+ return err
+ }
+ // Pricing overrides are loaded into ModelCatalog after initFrameworkConfigFromFile,
+ // once ModelCatalog is initialized.
} else {
logger.Debug("no governance config in store or config file")
}
+ return nil
}
// mergeGovernanceConfig merges governance config from file with store
@@ -1175,6 +1190,45 @@ func mergeGovernanceConfig(ctx context.Context, config *Config, configData *Conf
routingRulesToAdd = append(routingRulesToAdd, configData.Governance.RoutingRules[i])
}
}
+ // Merge PricingOverrides by ID with hash comparison
+ pricingOverridesToAdd := make([]configstoreTables.TablePricingOverride, 0)
+ pricingOverridesToUpdate := make([]configstoreTables.TablePricingOverride, 0)
+ for i, newOverride := range configData.Governance.PricingOverrides {
+ if len(newOverride.RequestTypes) > 0 {
+ b, err := json.Marshal(newOverride.RequestTypes)
+ if err != nil {
+ logger.Warn("failed to serialize request_types for pricing override %s: %v", newOverride.ID, err)
+ continue
+ }
+ configData.Governance.PricingOverrides[i].RequestTypesJSON = string(b)
+ } else {
+ configData.Governance.PricingOverrides[i].RequestTypesJSON = "[]"
+ }
+ fileHash, err := configstore.GeneratePricingOverrideHash(configData.Governance.PricingOverrides[i])
+ if err != nil {
+ logger.Warn("failed to generate pricing override hash for %s: %v", newOverride.ID, err)
+ continue
+ }
+ configData.Governance.PricingOverrides[i].ConfigHash = fileHash
+
+ found := false
+ for j, existing := range governanceConfig.PricingOverrides {
+ if existing.ID == newOverride.ID {
+ found = true
+ if existing.ConfigHash != fileHash {
+ logger.Debug("config hash mismatch for pricing override %s, syncing from config file", newOverride.ID)
+ pricingOverridesToUpdate = append(pricingOverridesToUpdate, configData.Governance.PricingOverrides[i])
+ governanceConfig.PricingOverrides[j] = configData.Governance.PricingOverrides[i]
+ } else {
+ logger.Debug("config hash matches for pricing override %s, keeping DB config", newOverride.ID)
+ }
+ break
+ }
+ }
+ if !found {
+ pricingOverridesToAdd = append(pricingOverridesToAdd, configData.Governance.PricingOverrides[i])
+ }
+ }
// Add merged items to config
config.GovernanceConfig.Budgets = append(governanceConfig.Budgets, budgetsToAdd...)
config.GovernanceConfig.RateLimits = append(governanceConfig.RateLimits, rateLimitsToAdd...)
@@ -1182,13 +1236,15 @@ func mergeGovernanceConfig(ctx context.Context, config *Config, configData *Conf
config.GovernanceConfig.Teams = append(governanceConfig.Teams, teamsToAdd...)
config.GovernanceConfig.VirtualKeys = append(governanceConfig.VirtualKeys, virtualKeysToAdd...)
config.GovernanceConfig.RoutingRules = append(governanceConfig.RoutingRules, routingRulesToAdd...)
+ config.GovernanceConfig.PricingOverrides = append(governanceConfig.PricingOverrides, pricingOverridesToAdd...)
// Update store with merged config items
hasChanges := len(budgetsToAdd) > 0 || len(budgetsToUpdate) > 0 ||
len(rateLimitsToAdd) > 0 || len(rateLimitsToUpdate) > 0 ||
len(customersToAdd) > 0 || len(customersToUpdate) > 0 ||
len(teamsToAdd) > 0 || len(teamsToUpdate) > 0 ||
len(virtualKeysToAdd) > 0 || len(virtualKeysToUpdate) > 0 ||
- len(routingRulesToAdd) > 0 || len(routingRulesToUpdate) > 0
+ len(routingRulesToAdd) > 0 || len(routingRulesToUpdate) > 0 ||
+ len(pricingOverridesToAdd) > 0 || len(pricingOverridesToUpdate) > 0
if config.ConfigStore != nil && hasChanges {
err := updateGovernanceConfigInStore(ctx, config,
budgetsToAdd, budgetsToUpdate,
@@ -1196,11 +1252,28 @@ func mergeGovernanceConfig(ctx context.Context, config *Config, configData *Conf
customersToAdd, customersToUpdate,
teamsToAdd, teamsToUpdate,
virtualKeysToAdd, virtualKeysToUpdate,
- routingRulesToAdd, routingRulesToUpdate)
+ routingRulesToAdd, routingRulesToUpdate,
+ pricingOverridesToAdd, pricingOverridesToUpdate)
if err != nil {
logger.Fatal("failed to sync governance config: %v", err)
}
}
+ // Sync pricing overrides into the model catalog in one batch to avoid
+ // rebuilding the lookup map on every iteration.
+ if config.ModelCatalog != nil {
+ rows := make([]*configstoreTables.TablePricingOverride, 0, len(pricingOverridesToAdd)+len(pricingOverridesToUpdate))
+ for i := range pricingOverridesToAdd {
+ rows = append(rows, &pricingOverridesToAdd[i])
+ }
+ for i := range pricingOverridesToUpdate {
+ rows = append(rows, &pricingOverridesToUpdate[i])
+ }
+ if len(rows) > 0 {
+ if err := config.ModelCatalog.UpsertPricingOverrides(rows...); err != nil {
+ logger.Error("failed to upsert pricing overrides into model catalog: %v", err)
+ }
+ }
+ }
}
// updateGovernanceConfigInStore updates governance config items in the store
@@ -1219,6 +1292,8 @@ func updateGovernanceConfigInStore(
virtualKeysToUpdate []configstoreTables.TableVirtualKey,
routingRulesToAdd []configstoreTables.TableRoutingRule,
routingRulesToUpdate []configstoreTables.TableRoutingRule,
+ pricingOverridesToAdd []configstoreTables.TablePricingOverride,
+ pricingOverridesToUpdate []configstoreTables.TablePricingOverride,
) error {
logger.Debug("updating governance config in store with merged items")
return config.ConfigStore.ExecuteTransaction(ctx, func(tx *gorm.DB) error {
@@ -1330,15 +1405,29 @@ func updateGovernanceConfigInStore(
}
}
+ // Create pricing overrides (new from config.json)
+ for _, override := range pricingOverridesToAdd {
+ if err := config.ConfigStore.CreatePricingOverride(ctx, &override, tx); err != nil {
+ return fmt.Errorf("failed to create pricing override %s: %w", override.ID, err)
+ }
+ }
+
+ // Update pricing overrides (config.json changed)
+ for _, override := range pricingOverridesToUpdate {
+ if err := config.ConfigStore.UpdatePricingOverride(ctx, &override, tx); err != nil {
+ return fmt.Errorf("failed to update pricing override %s: %w", override.ID, err)
+ }
+ }
+
return nil
})
}
// createGovernanceConfigInStore creates governance config in store from config file
-func createGovernanceConfigInStore(ctx context.Context, config *Config) {
+func createGovernanceConfigInStore(ctx context.Context, config *Config) error {
if config.ConfigStore == nil {
logger.Debug("createGovernanceConfigInStore: ConfigStore is nil, skipping")
- return
+ return nil
}
logger.Debug("createGovernanceConfigInStore: creating %d budgets, %d rate_limits, %d virtual_keys, %d routing_rules",
len(config.GovernanceConfig.Budgets),
@@ -1454,10 +1543,34 @@ func createGovernanceConfigInStore(ctx context.Context, config *Config) {
virtualKey.MCPConfigs = mcpConfigs
}
+ // Create pricing overrides after virtual keys so that scoped overrides referencing
+ // a virtual key ID are inserted after the VK row exists.
+ for i := range config.GovernanceConfig.PricingOverrides {
+ override := &config.GovernanceConfig.PricingOverrides[i]
+ if len(override.RequestTypes) > 0 {
+ b, err := json.Marshal(override.RequestTypes)
+ if err != nil {
+ return fmt.Errorf("failed to serialize request_types for pricing override %s: %w", override.ID, err)
+ }
+ override.RequestTypesJSON = string(b)
+ } else {
+ override.RequestTypesJSON = "[]"
+ }
+ overrideHash, err := configstore.GeneratePricingOverrideHash(*override)
+ if err != nil {
+ return fmt.Errorf("failed to generate pricing override hash for %s: %w", override.ID, err)
+ }
+ override.ConfigHash = overrideHash
+ if err := config.ConfigStore.CreatePricingOverride(ctx, override, tx); err != nil {
+ return fmt.Errorf("failed to create pricing override %s: %w", override.ID, err)
+ }
+ }
+
return nil
}); err != nil {
- logger.Warn("failed to update governance config: %v", err)
+ return fmt.Errorf("failed to create governance config in store: %w", err)
}
+ return nil
}
// isBcryptHash checks if a string looks like a bcrypt hash
@@ -1898,7 +2011,6 @@ func initFrameworkConfigFromFile(ctx context.Context, config *Config, configData
logger.Error("failed to initialize pricing manager: %v", err)
} else {
config.ModelCatalog = pricingManager
- applyProviderPricingOverrides(config.ModelCatalog, config.Providers)
}
// Initialize MCP catalog
@@ -2127,7 +2239,6 @@ func loadDefaultProviders(ctx context.Context, config *Config) error {
SendBackRawRequest: dbProvider.SendBackRawRequest,
SendBackRawResponse: dbProvider.SendBackRawResponse,
CustomProviderConfig: dbProvider.CustomProviderConfig,
- PricingOverrides: dbProvider.PricingOverrides,
ConfigHash: dbProvider.ConfigHash,
}
if err := ValidateCustomProvider(providerConfig, provider); err != nil {
@@ -2276,7 +2387,6 @@ func initDefaultFrameworkConfig(ctx context.Context, config *Config) error {
logger.Error("failed to initialize model catalog: %v", err)
} else {
config.ModelCatalog = modelCatalog
- applyProviderPricingOverrides(config.ModelCatalog, config.Providers)
}
// Initialize MCP catalog
@@ -3741,14 +3851,3 @@ func DeepCopy[T any](in T) (T, error) {
err = sonic.Unmarshal(b, &out)
return out, err
}
-
-func applyProviderPricingOverrides(catalog *modelcatalog.ModelCatalog, providers map[schemas.ModelProvider]configstore.ProviderConfig) {
- if catalog == nil {
- return
- }
- for provider, providerConfig := range providers {
- if err := catalog.SetProviderPricingOverrides(provider, providerConfig.PricingOverrides); err != nil {
- logger.Warn("failed to load pricing overrides for provider %s: %v", provider, err)
- }
- }
-}
diff --git a/transports/bifrost-http/lib/config_test.go b/transports/bifrost-http/lib/config_test.go
index 74cbd3d2e7..58cef42646 100644
--- a/transports/bifrost-http/lib/config_test.go
+++ b/transports/bifrost-http/lib/config_test.go
@@ -855,6 +855,30 @@ func (m *MockConfigStore) DeleteModelPrices(ctx context.Context, tx ...*gorm.DB)
return nil
}
+func (m *MockConfigStore) GetPricingOverrides(ctx context.Context, filter configstore.PricingOverrideFilters) ([]tables.TablePricingOverride, error) {
+ return []tables.TablePricingOverride{}, nil
+}
+
+func (m *MockConfigStore) GetPricingOverridesPaginated(ctx context.Context, params configstore.PricingOverridesQueryParams) ([]tables.TablePricingOverride, int64, error) {
+ return []tables.TablePricingOverride{}, 0, nil
+}
+
+func (m *MockConfigStore) GetPricingOverrideByID(ctx context.Context, id string) (*tables.TablePricingOverride, error) {
+ return nil, configstore.ErrNotFound
+}
+
+func (m *MockConfigStore) CreatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error {
+ return nil
+}
+
+func (m *MockConfigStore) UpdatePricingOverride(ctx context.Context, override *tables.TablePricingOverride, tx ...*gorm.DB) error {
+ return nil
+}
+
+func (m *MockConfigStore) DeletePricingOverride(ctx context.Context, id string, tx ...*gorm.DB) error {
+ return nil
+}
+
// Model parameters
func (m *MockConfigStore) GetModelParameters(ctx context.Context, model string) (*tables.TableModelParameters, error) {
return nil, nil
@@ -12249,13 +12273,13 @@ func TestMergePluginsFromFile_NoChangeSkipsMerge(t *testing.T) {
mock := &MockConfigStore{
plugins: []*tables.TablePlugin{
{
- Name: "plugin-a",
- Enabled: true,
- Placement: &postBuiltin,
- Order: &order0,
- Version: 1,
+ Name: "plugin-a",
+ Enabled: true,
+ Placement: &postBuiltin,
+ Order: &order0,
+ Version: 1,
ConfigJSON: `{"setting":"db-value"}`,
- Config: map[string]any{"setting": "db-value"},
+ Config: map[string]any{"setting": "db-value"},
},
},
}
diff --git a/transports/bifrost-http/server/server.go b/transports/bifrost-http/server/server.go
index a84b966ef1..c0e973d6a3 100644
--- a/transports/bifrost-http/server/server.go
+++ b/transports/bifrost-http/server/server.go
@@ -62,6 +62,8 @@ type ServerCallbacks interface {
// Pricing related callbacks
ReloadPricingManager(ctx context.Context) error
ForceReloadPricing(ctx context.Context) error
+ UpsertPricingOverride(ctx context.Context, override *tables.TablePricingOverride) error
+ DeletePricingOverride(ctx context.Context, id string) error
// Proxy related callbacks
ReloadProxyConfig(ctx context.Context, config *tables.GlobalProxyConfig) error
// Client config related callbacks
@@ -499,11 +501,6 @@ func (s *BifrostHTTPServer) ReloadProvider(ctx context.Context, provider schemas
}
}
- // Syncing models (this part always runs regardless of governance)
- if err := s.Config.ModelCatalog.SetProviderPricingOverrides(provider, providerInfo.PricingOverrides); err != nil {
- logger.Warn("failed to refresh pricing overrides for provider %s: %v", provider, err)
- }
-
bfCtx := schemas.NewBifrostContext(ctx, time.Now().Add(15*time.Second))
bfCtx.SetValue(schemas.BifrostContextKeySkipPluginPipeline, true)
bfCtx.SetValue(schemas.BifrostContextKeyValidateKeys, true) // Validate keys during provider add/update
@@ -595,7 +592,6 @@ func (s *BifrostHTTPServer) RemoveProvider(ctx context.Context, provider schemas
return fmt.Errorf("pricing manager not found")
}
s.Config.ModelCatalog.DeleteModelDataForProvider(provider)
- s.Config.ModelCatalog.DeleteProviderPricingOverrides(provider)
return nil
}
@@ -764,11 +760,6 @@ func (s *BifrostHTTPServer) ForceReloadPricing(ctx context.Context) error {
return fmt.Errorf("failed to initialize new model catalog: %w", err)
}
s.Config.ModelCatalog = modelCatalog
- for provider, providerConfig := range s.Config.Providers {
- if err := s.Config.ModelCatalog.SetProviderPricingOverrides(provider, providerConfig.PricingOverrides); err != nil {
- logger.Warn("failed to seed pricing overrides for provider %s: %v", provider, err)
- }
- }
} else {
if err := s.Config.ModelCatalog.ForceReloadPricing(ctx); err != nil {
return fmt.Errorf("failed to force reload pricing: %w", err)
@@ -817,6 +808,23 @@ func (s *BifrostHTTPServer) ForceReloadPricing(ctx context.Context) error {
return nil
}
+// UpsertPricingOverride inserts or updates a pricing override in the in-memory model catalog.
+func (s *BifrostHTTPServer) UpsertPricingOverride(ctx context.Context, override *tables.TablePricingOverride) error {
+ if s.Config == nil || s.Config.ModelCatalog == nil {
+ return fmt.Errorf("pricing manager not found")
+ }
+ return s.Config.ModelCatalog.UpsertPricingOverrides(override)
+}
+
+// DeletePricingOverride removes a pricing override from the in-memory model catalog.
+func (s *BifrostHTTPServer) DeletePricingOverride(ctx context.Context, id string) error {
+ if s.Config == nil || s.Config.ModelCatalog == nil {
+ return fmt.Errorf("pricing manager not found")
+ }
+ s.Config.ModelCatalog.DeletePricingOverride(id)
+ return nil
+}
+
// ReloadProxyConfig reloads the proxy configuration
func (s *BifrostHTTPServer) ReloadProxyConfig(ctx context.Context, config *tables.GlobalProxyConfig) error {
if s.Config == nil {
diff --git a/transports/config.schema.json b/transports/config.schema.json
index cfb89ada7b..0b841b48e2 100644
--- a/transports/config.schema.json
+++ b/transports/config.schema.json
@@ -497,6 +497,13 @@
"$ref": "#/$defs/routing_rule"
}
},
+ "pricing_overrides": {
+ "type": "array",
+ "description": "Scoped pricing overrides applied at runtime by the model catalog",
+ "items": {
+ "$ref": "#/$defs/pricing_override"
+ }
+ },
"auth_config": {
"$ref": "#/$defs/auth_config"
},
@@ -1613,159 +1620,6 @@
},
"additionalProperties": false
},
- "pricing_override_match_type": {
- "type": "string",
- "enum": [
- "exact",
- "wildcard",
- "regex"
- ]
- },
- "pricing_override_request_type": {
- "type": "string",
- "enum": [
- "text_completion",
- "text_completion_stream",
- "chat_completion",
- "chat_completion_stream",
- "responses",
- "responses_stream",
- "embedding",
- "rerank",
- "speech",
- "speech_stream",
- "transcription",
- "transcription_stream",
- "image_generation",
- "image_generation_stream"
- ]
- },
- "provider_pricing_override": {
- "type": "object",
- "properties": {
- "model_pattern": {
- "type": "string",
- "minLength": 1
- },
- "match_type": {
- "$ref": "#/$defs/pricing_override_match_type"
- },
- "request_types": {
- "type": "array",
- "items": {
- "$ref": "#/$defs/pricing_override_request_type"
- }
- },
- "input_cost_per_token": { "type": "number", "minimum": 0 },
- "output_cost_per_token": { "type": "number", "minimum": 0 },
- "input_cost_per_video_per_second": { "type": "number", "minimum": 0 },
- "input_cost_per_audio_per_second": { "type": "number", "minimum": 0 },
- "input_cost_per_character": { "type": "number", "minimum": 0 },
- "output_cost_per_character": { "type": "number", "minimum": 0 },
- "input_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 },
- "input_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 },
- "input_cost_per_image_above_128k_tokens": { "type": "number", "minimum": 0 },
- "input_cost_per_video_per_second_above_128k_tokens": { "type": "number", "minimum": 0 },
- "input_cost_per_audio_per_second_above_128k_tokens": { "type": "number", "minimum": 0 },
- "output_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 },
- "output_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 },
- "input_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 },
- "output_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 },
- "cache_creation_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 },
- "cache_read_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 },
- "cache_read_input_token_cost": { "type": "number", "minimum": 0 },
- "cache_creation_input_token_cost": { "type": "number", "minimum": 0 },
- "input_cost_per_token_batches": { "type": "number", "minimum": 0 },
- "output_cost_per_token_batches": { "type": "number", "minimum": 0 },
- "input_cost_per_image_token": { "type": "number", "minimum": 0 },
- "output_cost_per_image_token": { "type": "number", "minimum": 0 },
- "input_cost_per_image": { "type": "number", "minimum": 0 },
- "output_cost_per_image": { "type": "number", "minimum": 0 },
- "cache_read_input_image_token_cost": { "type": "number", "minimum": 0 }
- },
- "required": [
- "model_pattern",
- "match_type"
- ],
- "additionalProperties": false
- },
- "custom_provider_config": {
- "type": "object",
- "description": "Custom provider configuration for extending or customizing provider behavior",
- "properties": {
- "is_key_less": {
- "type": "boolean",
- "description": "Whether the custom provider requires a key"
- },
- "base_provider_type": {
- "type": "string",
- "description": "Base provider type to extend"
- },
- "allowed_requests": {
- "type": "object",
- "description": "Allowed request types for the custom provider",
- "properties": {
- "list_models": { "type": "boolean" },
- "text_completion": { "type": "boolean" },
- "text_completion_stream": { "type": "boolean" },
- "chat_completion": { "type": "boolean" },
- "chat_completion_stream": { "type": "boolean" },
- "responses": { "type": "boolean" },
- "responses_stream": { "type": "boolean" },
- "count_tokens": { "type": "boolean" },
- "embedding": { "type": "boolean" },
- "rerank": { "type": "boolean" },
- "speech": { "type": "boolean" },
- "speech_stream": { "type": "boolean" },
- "transcription": { "type": "boolean" },
- "transcription_stream": { "type": "boolean" },
- "image_generation": { "type": "boolean" },
- "image_generation_stream": { "type": "boolean" },
- "image_edit": { "type": "boolean" },
- "image_edit_stream": { "type": "boolean" },
- "image_variation": { "type": "boolean" },
- "video_generation": { "type": "boolean" },
- "video_retrieve": { "type": "boolean" },
- "video_download": { "type": "boolean" },
- "video_delete": { "type": "boolean" },
- "video_list": { "type": "boolean" },
- "video_remix": { "type": "boolean" },
- "batch_create": { "type": "boolean" },
- "batch_list": { "type": "boolean" },
- "batch_retrieve": { "type": "boolean" },
- "batch_cancel": { "type": "boolean" },
- "batch_delete": { "type": "boolean" },
- "batch_results": { "type": "boolean" },
- "file_upload": { "type": "boolean" },
- "file_list": { "type": "boolean" },
- "file_retrieve": { "type": "boolean" },
- "file_delete": { "type": "boolean" },
- "file_content": { "type": "boolean" },
- "container_create": { "type": "boolean" },
- "container_list": { "type": "boolean" },
- "container_retrieve": { "type": "boolean" },
- "container_delete": { "type": "boolean" },
- "container_file_create": { "type": "boolean" },
- "container_file_list": { "type": "boolean" },
- "container_file_retrieve": { "type": "boolean" },
- "container_file_content": { "type": "boolean" },
- "container_file_delete": { "type": "boolean" },
- "passthrough": { "type": "boolean" },
- "passthrough_stream": { "type": "boolean" }
- },
- "additionalProperties": false
- },
- "request_path_overrides": {
- "type": "object",
- "description": "Mapping of request type to custom path overriding the default provider path",
- "additionalProperties": {
- "type": "string"
- }
- }
- },
- "required": ["base_provider_type"],
- "additionalProperties": false
- },
"network_config": {
"type": "object",
"properties": {
@@ -2074,13 +1928,6 @@
},
"custom_provider_config": {
"$ref": "#/$defs/custom_provider_config"
- },
- "pricing_overrides": {
- "type": "array",
- "items": {
- "$ref": "#/$defs/provider_pricing_override"
- },
- "description": "Provider-level pricing overrides matched by model pattern"
}
},
"required": [
@@ -2122,13 +1969,6 @@
},
"custom_provider_config": {
"$ref": "#/$defs/custom_provider_config"
- },
- "pricing_overrides": {
- "type": "array",
- "items": {
- "$ref": "#/$defs/provider_pricing_override"
- },
- "description": "Provider-level pricing overrides matched by model pattern"
}
},
"required": [
@@ -2170,13 +2010,6 @@
},
"custom_provider_config": {
"$ref": "#/$defs/custom_provider_config"
- },
- "pricing_overrides": {
- "type": "array",
- "items": {
- "$ref": "#/$defs/provider_pricing_override"
- },
- "description": "Provider-level pricing overrides matched by model pattern"
}
},
"required": [
@@ -2218,13 +2051,6 @@
},
"custom_provider_config": {
"$ref": "#/$defs/custom_provider_config"
- },
- "pricing_overrides": {
- "type": "array",
- "items": {
- "$ref": "#/$defs/provider_pricing_override"
- },
- "description": "Provider-level pricing overrides matched by model pattern"
}
},
"required": [
@@ -2266,13 +2092,6 @@
},
"custom_provider_config": {
"$ref": "#/$defs/custom_provider_config"
- },
- "pricing_overrides": {
- "type": "array",
- "items": {
- "$ref": "#/$defs/provider_pricing_override"
- },
- "description": "Provider-level pricing overrides matched by model pattern"
}
},
"required": [
@@ -3209,6 +3028,155 @@
}
},
"additionalProperties": false
+ },
+ "pricing_override": {
+ "type": "object",
+ "description": "Scoped pricing override applied at runtime by the model catalog",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique pricing override ID"
+ },
+ "name": {
+ "type": "string",
+ "description": "Human-readable name for this override"
+ },
+ "scope_kind": {
+ "type": "string",
+ "description": "Scope level for this override",
+ "enum": ["global", "provider", "provider_key", "virtual_key", "virtual_key_provider", "virtual_key_provider_key"]
+ },
+ "virtual_key_id": {
+ "type": "string",
+ "description": "Virtual key ID (required for virtual_key* scopes)"
+ },
+ "provider_id": {
+ "type": "string",
+ "description": "Provider ID (required for provider* scopes)"
+ },
+ "provider_key_id": {
+ "type": "string",
+ "description": "Provider key ID (required for provider_key and virtual_key_provider_key scopes)"
+ },
+ "match_type": {
+ "type": "string",
+ "description": "How the pattern is matched against model names",
+ "enum": ["exact", "wildcard"]
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Model name pattern to match (exact name or wildcard prefix ending with *)"
+ },
+ "request_types": {
+ "type": "array",
+ "description": "Request types this override applies to. At least one value is required.",
+ "minItems": 1,
+ "items": {
+ "type": "string"
+ }
+ },
+ "pricing_patch": {
+ "type": "string",
+ "description": "JSON-encoded pricing fields to override (e.g. '{\"input_cost_per_token\":0.000001}')"
+ },
+ "config_hash": {
+ "type": "string",
+ "description": "Internal hash for change detection (auto-managed)"
+ }
+ },
+ "required": ["id", "name", "scope_kind", "match_type", "pattern", "request_types"],
+ "additionalProperties": false
+ },
+ "pricing_override_match_type": {
+ "type": "string",
+ "enum": ["exact", "wildcard"]
+ },
+ "pricing_override_request_type": {
+ "type": "string",
+ "enum": [
+ "chat_completion", "text_completion", "responses",
+ "embedding", "rerank",
+ "speech", "transcription",
+ "image_generation", "image_variation", "image_edit",
+ "video_generation", "video_remix"
+ ]
+ },
+ "custom_provider_config": {
+ "type": "object",
+ "description": "Custom provider configuration for extending or customizing provider behavior",
+ "properties": {
+ "is_key_less": {
+ "type": "boolean",
+ "description": "Whether the custom provider requires a key"
+ },
+ "base_provider_type": {
+ "type": "string",
+ "description": "Base provider type to extend"
+ },
+ "allowed_requests": {
+ "type": "object",
+ "description": "Allowed request types for the custom provider",
+ "properties": {
+ "list_models": { "type": "boolean" },
+ "text_completion": { "type": "boolean" },
+ "text_completion_stream": { "type": "boolean" },
+ "chat_completion": { "type": "boolean" },
+ "chat_completion_stream": { "type": "boolean" },
+ "responses": { "type": "boolean" },
+ "responses_stream": { "type": "boolean" },
+ "count_tokens": { "type": "boolean" },
+ "embedding": { "type": "boolean" },
+ "rerank": { "type": "boolean" },
+ "speech": { "type": "boolean" },
+ "speech_stream": { "type": "boolean" },
+ "transcription": { "type": "boolean" },
+ "transcription_stream": { "type": "boolean" },
+ "image_generation": { "type": "boolean" },
+ "image_generation_stream": { "type": "boolean" },
+ "image_edit": { "type": "boolean" },
+ "image_edit_stream": { "type": "boolean" },
+ "image_variation": { "type": "boolean" },
+ "video_generation": { "type": "boolean" },
+ "video_retrieve": { "type": "boolean" },
+ "video_download": { "type": "boolean" },
+ "video_delete": { "type": "boolean" },
+ "video_list": { "type": "boolean" },
+ "video_remix": { "type": "boolean" },
+ "batch_create": { "type": "boolean" },
+ "batch_list": { "type": "boolean" },
+ "batch_retrieve": { "type": "boolean" },
+ "batch_cancel": { "type": "boolean" },
+ "batch_delete": { "type": "boolean" },
+ "batch_results": { "type": "boolean" },
+ "file_upload": { "type": "boolean" },
+ "file_list": { "type": "boolean" },
+ "file_retrieve": { "type": "boolean" },
+ "file_delete": { "type": "boolean" },
+ "file_content": { "type": "boolean" },
+ "container_create": { "type": "boolean" },
+ "container_list": { "type": "boolean" },
+ "container_retrieve": { "type": "boolean" },
+ "container_delete": { "type": "boolean" },
+ "container_file_create": { "type": "boolean" },
+ "container_file_list": { "type": "boolean" },
+ "container_file_retrieve": { "type": "boolean" },
+ "container_file_content": { "type": "boolean" },
+ "container_file_delete": { "type": "boolean" },
+ "passthrough": { "type": "boolean" },
+ "passthrough_stream": { "type": "boolean" },
+ "websocket_responses": { "type": "boolean" },
+ "realtime": { "type": "boolean" }
+ },
+ "additionalProperties": false
+ },
+ "request_path_overrides": {
+ "type": "object",
+ "description": "Mapping of request type to custom path overriding the default provider path",
+ "additionalProperties": { "type": "string" }
+ }
+ },
+ "required": ["base_provider_type"],
+ "additionalProperties": false
}
}
}
diff --git a/ui/app/workspace/custom-pricing/overrides/page.tsx b/ui/app/workspace/custom-pricing/overrides/page.tsx
new file mode 100644
index 0000000000..69de04cfb7
--- /dev/null
+++ b/ui/app/workspace/custom-pricing/overrides/page.tsx
@@ -0,0 +1,11 @@
+"use client";
+
+import ScopedPricingOverridesView from "@/app/workspace/custom-pricing/overrides/scopedPricingOverridesView";
+
+export default function ScopedPricingOverridesPage() {
+ return (
+
+
+
+ );
+}
diff --git a/ui/app/workspace/custom-pricing/overrides/pricingFieldSelector.tsx b/ui/app/workspace/custom-pricing/overrides/pricingFieldSelector.tsx
new file mode 100644
index 0000000000..8552e08565
--- /dev/null
+++ b/ui/app/workspace/custom-pricing/overrides/pricingFieldSelector.tsx
@@ -0,0 +1,234 @@
+"use client";
+
+import { Badge } from "@/components/ui/badge";
+import { Input } from "@/components/ui/input";
+import { cn } from "@/lib/utils";
+import { ChevronDown, Plus, X } from "lucide-react";
+import { useEffect, useMemo, useState } from "react";
+import type { FieldErrors, PricingFieldKey } from "./pricingOverrideSheet";
+import { PRICING_FIELDS } from "./pricingOverrideSheet";
+
+type GroupKey = "chat" | "embedding" | "rerank" | "audio" | "image" | "video";
+
+const PRICING_GROUPS: { key: GroupKey; label: string }[] = [
+ { key: "chat", label: "Chat / Text / Responses" },
+ { key: "embedding", label: "Embedding" },
+ { key: "rerank", label: "Rerank" },
+ { key: "audio", label: "Audio" },
+ { key: "image", label: "Image" },
+ { key: "video", label: "Video" },
+];
+
+const REQUEST_TYPE_TO_CATEGORY: Record = {
+ chat_completion: "chat",
+ text_completion: "chat",
+ responses: "chat",
+ embedding: "embedding",
+ rerank: "rerank",
+ speech: "audio",
+ transcription: "audio",
+ image_generation: "image",
+ image_variation: "image",
+ image_edit: "image",
+ video_generation: "video",
+ video_remix: "video",
+};
+
+interface PricingFieldSelectorProps {
+ values: Partial>;
+ errors: FieldErrors;
+ selectedRequestTypes?: string[];
+ onChange: (key: PricingFieldKey, value: string) => void;
+ onFieldInteraction?: () => void;
+}
+
+export function PricingFieldSelector({ values, errors, selectedRequestTypes, onChange, onFieldInteraction }: PricingFieldSelectorProps) {
+ const [search, setSearch] = useState("");
+ const [openGroups, setOpenGroups] = useState>(new Set(["chat"]));
+
+ const [activeFields, setActiveFields] = useState>(
+ () => new Set(PRICING_FIELDS.filter((f) => values[f.key] != null && values[f.key]!.trim() !== "").map((f) => f.key)),
+ );
+
+ // Sync active fields to exactly the set of keys that have non-empty values.
+ // This handles both loading new overrides (adds keys) and clearing the patch (removes stale keys).
+ useEffect(() => {
+ setActiveFields(new Set(PRICING_FIELDS.filter((f) => values[f.key] != null && values[f.key]!.trim() !== "").map((f) => f.key)));
+ }, [values]);
+
+ // Derive active categories from selected request types
+ const activeCategories = useMemo | null>(() => {
+ if (!selectedRequestTypes || selectedRequestTypes.length === 0) return null;
+ const cats = new Set();
+ for (const rt of selectedRequestTypes) {
+ const cat = REQUEST_TYPE_TO_CATEGORY[rt];
+ if (cat) cats.add(cat);
+ }
+ return cats.size > 0 ? cats : null;
+ }, [selectedRequestTypes]);
+
+ const trimmedSearch = search.trim().toLowerCase();
+ const isSearching = trimmedSearch.length > 0;
+
+ const filteredFields = useMemo(() => {
+ if (!isSearching) return null;
+ return PRICING_FIELDS.filter((f) => f.label.toLowerCase().includes(trimmedSearch) || f.key.toLowerCase().includes(trimmedSearch));
+ }, [isSearching, trimmedSearch]);
+
+ // Fields visible per group when not searching, respecting activeCategories filter
+ const visibleGroupedFields = useMemo(
+ () =>
+ PRICING_GROUPS.map((group) => {
+ const fields = PRICING_FIELDS.filter((f) => {
+ if (f.group !== group.key) return false;
+ if (activeCategories === null) return true;
+ return (f.requestTypeGroups as readonly string[]).some((rg) => activeCategories.has(rg as GroupKey));
+ });
+ return { ...group, fields };
+ }).filter((g) => g.fields.length > 0),
+ [activeCategories],
+ );
+
+ const toggleGroup = (key: GroupKey) => {
+ setOpenGroups((prev) => {
+ const next = new Set(prev);
+ if (next.has(key)) next.delete(key);
+ else next.add(key);
+ return next;
+ });
+ };
+
+ const activateField = (key: PricingFieldKey) => {
+ setActiveFields((prev) => new Set([...prev, key]));
+ };
+
+ const deactivateField = (key: PricingFieldKey) => {
+ setActiveFields((prev) => {
+ const next = new Set(prev);
+ next.delete(key);
+ return next;
+ });
+ onFieldInteraction?.();
+ onChange(key, "");
+ };
+
+ const handleInputChange = (key: PricingFieldKey, value: string) => {
+ onFieldInteraction?.();
+ onChange(key, value);
+ };
+
+ const renderFieldRow = (field: { key: PricingFieldKey; label: string }) => {
+ const isActive = activeFields.has(field.key);
+ const hasValue = values[field.key]?.trim();
+ const error = errors[field.key];
+
+ if (!isActive) {
+ return (
+
+ );
+ }
+
+ return (
+
+
+ {field.label}
+
+
+
handleInputChange(field.key, e.target.value)}
+ placeholder="0.0"
+ />
+ {error &&
{error}
}
+
+ );
+ };
+
+ return (
+
+
setSearch(e.target.value)}
+ className="h-9"
+ data-testid="pricing-field-search"
+ />
+
+
+ {isSearching ? (
+
+ {filteredFields!.length === 0 ? (
+
No fields match “{search}”
+ ) : (
+ filteredFields!.map((field) => renderFieldRow(field))
+ )}
+
+ ) : (
+
+ {visibleGroupedFields.length === 0 ? (
+
No pricing fields for the selected request types
+ ) : (
+ visibleGroupedFields.map((group) => {
+ const isOpen = openGroups.has(group.key);
+ const valueCount = group.fields.filter((f) => values[f.key]?.trim()).length;
+
+ return (
+
+
+
+ {isOpen && (
+
+ {group.fields.map((field) => renderFieldRow(field))}
+
+ )}
+
+ );
+ })
+ )}
+
+ )}
+
+
+ );
+}
diff --git a/ui/app/workspace/custom-pricing/overrides/pricingOverrideSheet.tsx b/ui/app/workspace/custom-pricing/overrides/pricingOverrideSheet.tsx
new file mode 100644
index 0000000000..e31d3eb1ef
--- /dev/null
+++ b/ui/app/workspace/custom-pricing/overrides/pricingOverrideSheet.tsx
@@ -0,0 +1,884 @@
+"use client";
+
+import { CodeEditor } from "@/components/ui/codeEditor";
+import { Button } from "@/components/ui/button";
+import { Checkbox } from "@/components/ui/checkbox";
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
+import { DottedSeparator } from "@/components/ui/separator";
+import { Sheet, SheetContent, SheetHeader, SheetTitle } from "@/components/ui/sheet";
+import { PricingFieldSelector } from "./pricingFieldSelector";
+import {
+ getErrorMessage,
+ useCreatePricingOverrideMutation,
+ useGetProvidersQuery,
+ useGetVirtualKeysQuery,
+ useUpdatePricingOverrideMutation,
+} from "@/lib/store";
+import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons";
+import { getProviderLabel, RequestTypeLabels } from "@/lib/constants/logs";
+import { ModelProvider, RequestType } from "@/lib/types/config";
+import {
+ CreatePricingOverrideRequest,
+ PricingOverride,
+ PricingOverrideMatchType,
+ PricingOverridePatch,
+ PricingOverrideScopeKind,
+} from "@/lib/types/governance";
+import { cn } from "@/lib/utils";
+import { ChevronDown, Save, X } from "lucide-react";
+import { Dispatch, SetStateAction, useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { toast } from "sonner";
+
+export const REQUEST_TYPE_GROUPS = [
+ {
+ label: "Chat / Text / Responses",
+ types: ["chat_completion", "text_completion", "responses"],
+ },
+ {
+ label: "Embedding",
+ types: ["embedding"],
+ },
+ {
+ label: "Rerank",
+ types: ["rerank"],
+ },
+ {
+ label: "Audio",
+ types: ["speech", "transcription"],
+ },
+ {
+ label: "Image",
+ types: ["image_generation", "image_variation", "image_edit"],
+ },
+ {
+ label: "Video",
+ types: ["video_generation", "video_remix"],
+ },
+] as const;
+
+export const REQUEST_TYPE_OPTIONS = REQUEST_TYPE_GROUPS.flatMap((g) => g.types);
+
+export function getRequestTypeGroup(rt: string): string | undefined {
+ return REQUEST_TYPE_GROUPS.find((g) => (g.types as readonly string[]).includes(rt))?.label;
+}
+
+export const PRICING_FIELDS = [
+ // Chat / Text / Responses fields
+ { key: "input_cost_per_token", label: "Input / token", group: "chat", requestTypeGroups: ["chat", "embedding", "rerank", "audio", "image", "video"] },
+ { key: "output_cost_per_token", label: "Output / token", group: "chat", requestTypeGroups: ["chat", "rerank", "audio", "image", "video"] },
+ { key: "input_cost_per_token_batches", label: "Input / token (batch)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "output_cost_per_token_batches", label: "Output / token (batch)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "input_cost_per_token_priority", label: "Input / token (priority)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "output_cost_per_token_priority", label: "Output / token (priority)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "input_cost_per_token_above_128k_tokens", label: "Input / token (>128k)", group: "chat", requestTypeGroups: ["chat", "embedding", "rerank"] },
+ { key: "output_cost_per_token_above_128k_tokens", label: "Output / token (>128k)", group: "chat", requestTypeGroups: ["chat", "rerank", "audio"] },
+ { key: "input_cost_per_token_above_200k_tokens", label: "Input / token (>200k)", group: "chat", requestTypeGroups: ["chat", "embedding", "rerank"] },
+ { key: "output_cost_per_token_above_200k_tokens", label: "Output / token (>200k)", group: "chat", requestTypeGroups: ["chat", "rerank", "audio"] },
+ { key: "cache_creation_input_token_cost", label: "Cache creation / token", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "cache_read_input_token_cost", label: "Cache read / token", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "cache_creation_input_token_cost_above_200k_tokens", label: "Cache creation / token (>200k)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "cache_read_input_token_cost_above_200k_tokens", label: "Cache read / token (>200k)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "cache_creation_input_token_cost_above_1hr", label: "Cache creation / token (>1hr)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "cache_creation_input_token_cost_above_1hr_above_200k_tokens", label: "Cache creation / token (>1hr, >200k)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "cache_read_input_token_cost_priority", label: "Cache read / token (priority)", group: "chat", requestTypeGroups: ["chat"] },
+ { key: "search_context_cost_per_query", label: "Search context / query", group: "chat", requestTypeGroups: ["chat", "rerank"] },
+ { key: "code_interpreter_cost_per_session", label: "Code interpreter / session", group: "chat", requestTypeGroups: ["chat"] },
+ // Audio fields
+ { key: "input_cost_per_character", label: "Input / character", group: "audio", requestTypeGroups: ["audio"] },
+ { key: "input_cost_per_audio_token", label: "Input / audio token", group: "audio", requestTypeGroups: ["audio"] },
+ { key: "input_cost_per_audio_per_second", label: "Input / audio second", group: "audio", requestTypeGroups: ["audio"] },
+ { key: "input_cost_per_audio_per_second_above_128k_tokens", label: "Input / audio second (>128k)", group: "audio", requestTypeGroups: ["audio"] },
+ { key: "input_cost_per_second", label: "Input / second", group: "audio", requestTypeGroups: ["audio", "video"] },
+ { key: "output_cost_per_audio_token", label: "Output / audio token", group: "audio", requestTypeGroups: ["audio"] },
+ { key: "output_cost_per_second", label: "Output / second", group: "audio", requestTypeGroups: ["audio", "video"] },
+ { key: "cache_creation_input_audio_token_cost", label: "Cache creation / audio token", group: "audio", requestTypeGroups: ["audio"] },
+ // Image fields
+ { key: "input_cost_per_image_token", label: "Input / image token", group: "image", requestTypeGroups: ["image"] },
+ { key: "input_cost_per_image", label: "Input / image", group: "image", requestTypeGroups: ["image"] },
+ { key: "input_cost_per_image_above_128k_tokens", label: "Input / image (>128k)", group: "image", requestTypeGroups: ["image"] },
+ { key: "input_cost_per_pixel", label: "Input / pixel", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_token", label: "Output / image token", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image", label: "Output / image", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_pixel", label: "Output / pixel", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_premium_image", label: "Output / image (premium)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_above_512_and_512_pixels", label: "Output / image (>512px)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_above_512_and_512_pixels_and_premium_image", label: "Output / image (>512px, premium)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_above_1024_and_1024_pixels", label: "Output / image (>1024px)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image", label: "Output / image (>1024px, premium)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_low_quality", label: "Output / image (low quality)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_medium_quality", label: "Output / image (medium quality)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_high_quality", label: "Output / image (high quality)", group: "image", requestTypeGroups: ["image"] },
+ { key: "output_cost_per_image_auto_quality", label: "Output / image (auto quality)", group: "image", requestTypeGroups: ["image"] },
+ { key: "cache_read_input_image_token_cost", label: "Cache read / image token", group: "image", requestTypeGroups: ["image"] },
+ // Video fields
+ { key: "input_cost_per_video_per_second", label: "Input / video second", group: "video", requestTypeGroups: ["video"] },
+ { key: "input_cost_per_video_per_second_above_128k_tokens", label: "Input / video second (>128k)", group: "video", requestTypeGroups: ["video"] },
+ { key: "output_cost_per_video_per_second", label: "Output / video second", group: "video", requestTypeGroups: ["video"] },
+] as const;
+
+export type PricingFieldKey = (typeof PRICING_FIELDS)[number]["key"];
+export type FieldErrors = Partial>;
+
+type ScopeRoot = "global" | "virtual_key";
+
+export interface FormState {
+ name: string;
+ scopeRoot: ScopeRoot;
+ virtualKeyID: string;
+ providerID: string;
+ providerKeyID: string;
+ matchType: PricingOverrideMatchType;
+ pattern: string;
+ requestTypes: RequestType[];
+ pricingValues: Partial>;
+}
+
+export const defaultFormState: FormState = {
+ name: "",
+ scopeRoot: "global",
+ virtualKeyID: "",
+ providerID: "",
+ providerKeyID: "",
+ matchType: "exact",
+ pattern: "",
+ requestTypes: [],
+ pricingValues: {},
+};
+
+export const fieldLabelByKey = Object.fromEntries(PRICING_FIELDS.map((field) => [field.key, field.label])) as Record<
+ PricingFieldKey,
+ string
+>;
+export const patchKeys = PRICING_FIELDS.map((field) => field.key) as PricingFieldKey[];
+
+export function patternError(matchType: PricingOverrideMatchType, pattern: string): string | undefined {
+ const trimmed = pattern.trim();
+ if (!trimmed) return "Pattern is required";
+ if (matchType === "exact") {
+ if (trimmed.includes("*")) return "Exact pattern cannot contain *";
+ } else if (matchType === "wildcard") {
+ const starCount = (trimmed.match(/\*/g) || []).length;
+ if (starCount === 0) return "Wildcard pattern must end with * (example: gpt-5*)";
+ if (starCount > 1) return "Wildcard pattern can include only one *";
+ if (!trimmed.endsWith("*")) return "Wildcard supports prefix-only trailing *";
+ }
+ return undefined;
+}
+
+export function buildPatchFromForm(form: FormState): { patch: PricingOverridePatch; errors: FieldErrors } {
+ const errors: FieldErrors = {};
+ const patch: PricingOverridePatch = {};
+
+ for (const key of patchKeys) {
+ const raw = form.pricingValues[key];
+ if (raw == null || raw.trim() === "") continue;
+ const parsed = Number(raw);
+ if (!Number.isFinite(parsed)) {
+ errors[key] = "Must be a number";
+ continue;
+ }
+ if (parsed < 0) {
+ errors[key] = "Must be >= 0";
+ continue;
+ }
+ (patch as Record)[key] = parsed;
+ }
+
+ return { patch, errors };
+}
+
+function toFormState(override: PricingOverride): FormState {
+ const values: Partial> = {};
+ let parsedPatch: Record = {};
+ try {
+ if (override.pricing_patch) parsedPatch = JSON.parse(override.pricing_patch);
+ } catch {
+ // malformed patch — leave values empty
+ }
+ for (const key of patchKeys) {
+ const val = parsedPatch[key];
+ if (typeof val === "number") values[key] = String(val);
+ }
+ const scopeKind = resolveScopeKind(override);
+
+ const scopeRoot: ScopeRoot =
+ scopeKind === "virtual_key" || scopeKind === "virtual_key_provider" || scopeKind === "virtual_key_provider_key"
+ ? "virtual_key"
+ : "global";
+
+ return {
+ name: override.name ?? "",
+ scopeRoot,
+ virtualKeyID: override.virtual_key_id ?? "",
+ providerID: override.provider_id ?? "",
+ providerKeyID: override.provider_key_id ?? "",
+ matchType: override.match_type,
+ pattern: override.pattern,
+ requestTypes: override.request_types ?? [],
+ pricingValues: values,
+ };
+}
+
+function resolveScopeKind(override: PricingOverride): PricingOverrideScopeKind {
+ if (
+ override.scope_kind === "global" ||
+ override.scope_kind === "provider" ||
+ override.scope_kind === "provider_key" ||
+ override.scope_kind === "virtual_key" ||
+ override.scope_kind === "virtual_key_provider" ||
+ override.scope_kind === "virtual_key_provider_key"
+ ) {
+ return override.scope_kind;
+ }
+ if (override.virtual_key_id) {
+ if (override.provider_key_id) return "virtual_key_provider_key";
+ if (override.provider_id) return "virtual_key_provider";
+ return "virtual_key";
+ }
+ if (override.provider_key_id) return "provider_key";
+ if (override.provider_id) return "provider";
+ return "global";
+}
+
+function deriveScopeKind(form: FormState): PricingOverrideScopeKind {
+ if (form.scopeRoot === "virtual_key") {
+ if (form.providerKeyID) return "virtual_key_provider_key";
+ if (form.providerID) return "virtual_key_provider";
+ return "virtual_key";
+ }
+ if (form.providerKeyID) return "provider_key";
+ if (form.providerID) return "provider";
+ return "global";
+}
+
+export function patchSummary(override: PricingOverride): string {
+ let parsed: Record = {};
+ try {
+ if (override.pricing_patch) parsed = JSON.parse(override.pricing_patch);
+ } catch {
+ // ignore
+ }
+ const keys = Object.keys(parsed) as PricingFieldKey[];
+ if (keys.length === 0) return "None";
+ const labels = keys.map((key) => fieldLabelByKey[key] || key);
+ if (labels.length <= 2) return labels.join(", ");
+ return `${labels.slice(0, 2).join(", ")} +${labels.length - 2} more`;
+}
+
+export function renderFields(
+ fields: ReadonlyArray<{ key: PricingFieldKey; label: string }>,
+ form: FormState,
+ setForm: Dispatch>,
+ errors: FieldErrors,
+ onFieldChange?: () => void,
+) {
+ return (
+
+ {fields.map((field) => (
+
+
+
{
+ onFieldChange?.();
+ setForm((prev) => ({
+ ...prev,
+ pricingValues: { ...prev.pricingValues, [field.key]: e.target.value },
+ }));
+ }}
+ />
+ {errors[field.key] &&
{errors[field.key]}
}
+
+ ))}
+
+ );
+}
+
+
+interface PricingOverrideDrawerProps {
+ open: boolean;
+ onOpenChange: (open: boolean) => void;
+ editingOverride?: PricingOverride | null;
+ scopeLock?: {
+ scopeKind: PricingOverrideScopeKind;
+ virtualKeyID?: string;
+ providerID?: string;
+ providerKeyID?: string;
+ label?: string;
+ };
+ onSaved?: () => void;
+}
+
+function isCompleteScopeLock(scopeLock?: PricingOverrideDrawerProps["scopeLock"]): boolean {
+ if (!scopeLock) return false;
+ switch (scopeLock.scopeKind) {
+ case "global":
+ return true;
+ case "provider":
+ return Boolean(scopeLock.providerID);
+ case "provider_key":
+ return Boolean(scopeLock.providerKeyID);
+ case "virtual_key":
+ return Boolean(scopeLock.virtualKeyID);
+ case "virtual_key_provider":
+ return Boolean(scopeLock.virtualKeyID && scopeLock.providerID);
+ case "virtual_key_provider_key":
+ return Boolean(scopeLock.virtualKeyID && scopeLock.providerID && scopeLock.providerKeyID);
+ default:
+ return false;
+ }
+}
+
+export default function PricingOverrideSheet({ open, onOpenChange, editingOverride, scopeLock, onSaved }: PricingOverrideDrawerProps) {
+ const { data: providersData, isLoading: isProvidersLoading, error: providersError } = useGetProvidersQuery();
+ const { data: virtualKeysData, isLoading: isVirtualKeysLoading, error: virtualKeysError } = useGetVirtualKeysQuery();
+ const [createOverride, { isLoading: isCreating }] = useCreatePricingOverrideMutation();
+ const [updateOverride, { isLoading: isPatching }] = useUpdatePricingOverrideMutation();
+
+ const [form, setForm] = useState(defaultFormState);
+ const [jsonPatch, setJSONPatch] = useState("");
+ const [jsonError, setJSONError] = useState();
+ const jsonEditingRef = useRef(false);
+ const prevOpenRef = useRef(false);
+ const [requestTypePopoverOpen, setRequestTypePopoverOpen] = useState(false);
+ const shouldLockScope = useMemo(() => !editingOverride && isCompleteScopeLock(scopeLock), [editingOverride, scopeLock]);
+
+ const isSaving = isCreating || isPatching;
+ const providers = useMemo(() => (providersError ? [] : (providersData ?? [])), [providersData, providersError]);
+ const virtualKeys = useMemo(() => (virtualKeysError ? [] : (virtualKeysData?.virtual_keys ?? [])), [virtualKeysData, virtualKeysError]);
+
+ const providerKeyOptions = useMemo(
+ () =>
+ providers.flatMap((provider) =>
+ (provider.keys || []).map((key) => ({
+ id: key.id,
+ providerName: provider.name,
+ label: key.name || key.id,
+ })),
+ ),
+ [providers],
+ );
+ const providerScopedKeyOptions = useMemo(
+ () => providerKeyOptions.filter((key) => key.providerName === form.providerID),
+ [providerKeyOptions, form.providerID],
+ );
+
+ // Hydrate the form only when the sheet transitions from closed → open.
+ // This prevents providerKeyOptions refetches from resetting unsaved edits.
+ useEffect(() => {
+ const wasOpen = prevOpenRef.current;
+ prevOpenRef.current = open;
+ if (!open || wasOpen) return;
+
+ jsonEditingRef.current = false;
+ setJSONError(undefined);
+ if (editingOverride) {
+ const state = toFormState(editingOverride);
+ // For provider_key scopes, provider_id is not stored in the DB (it's implicit from
+ // the key). Derive it from providerKeyOptions so the provider selector renders and
+ // the filtered key list shows the pre-selected key correctly.
+ if (!state.providerID && state.providerKeyID) {
+ const match = providerKeyOptions.find((k) => k.id === state.providerKeyID);
+ if (match) state.providerID = match.providerName;
+ }
+ setForm(state);
+ return;
+ }
+ if (shouldLockScope && scopeLock) {
+ const scopedForm: FormState = {
+ ...defaultFormState,
+ virtualKeyID: scopeLock.virtualKeyID ?? "",
+ providerID: scopeLock.providerID ?? "",
+ providerKeyID: scopeLock.providerKeyID ?? "",
+ scopeRoot:
+ scopeLock.scopeKind === "virtual_key" ||
+ scopeLock.scopeKind === "virtual_key_provider" ||
+ scopeLock.scopeKind === "virtual_key_provider_key"
+ ? "virtual_key"
+ : "global",
+ };
+ setForm(scopedForm);
+ return;
+ }
+ setForm(defaultFormState);
+ }, [open, editingOverride, scopeLock, shouldLockScope, providerKeyOptions]);
+
+ // When providerKeyOptions loads after the sheet is already open in edit mode,
+ // backfill the derived providerID without resetting the rest of the form.
+ useEffect(() => {
+ if (!open || !editingOverride) return;
+ setForm((prev) => {
+ if (prev.providerID || !prev.providerKeyID) return prev;
+ const match = providerKeyOptions.find((k) => k.id === prev.providerKeyID);
+ if (!match) return prev;
+ return { ...prev, providerID: match.providerName };
+ });
+ }, [providerKeyOptions, open, editingOverride]);
+
+ const resolvedScopeKind = useMemo(() => {
+ if (shouldLockScope && scopeLock?.scopeKind) return scopeLock.scopeKind;
+ return deriveScopeKind(form);
+ }, [scopeLock, shouldLockScope, form]);
+
+ const resolvedVirtualKeyID = useMemo(() => {
+ if (shouldLockScope) return scopeLock?.virtualKeyID;
+ return form.scopeRoot === "virtual_key" ? form.virtualKeyID || undefined : undefined;
+ }, [scopeLock, shouldLockScope, form.scopeRoot, form.virtualKeyID]);
+
+ const resolvedProviderID = useMemo(() => {
+ if (shouldLockScope) return scopeLock?.providerID;
+ return form.providerID || undefined;
+ }, [scopeLock, shouldLockScope, form.providerID]);
+
+ const resolvedProviderKeyID = useMemo(() => {
+ if (shouldLockScope) return scopeLock?.providerKeyID;
+ return form.providerKeyID || undefined;
+ }, [scopeLock, shouldLockScope, form.providerKeyID]);
+
+ const pricingFieldErrors = useMemo(() => {
+ const errors: FieldErrors = {};
+ for (const key of patchKeys) {
+ const raw = form.pricingValues[key];
+ if (!raw || raw.trim() === "") continue;
+ const parsed = Number(raw);
+ if (!Number.isFinite(parsed)) errors[key] = "Must be a number";
+ else if (parsed < 0) errors[key] = "Must be >= 0";
+ }
+ return errors;
+ }, [form.pricingValues]);
+
+ useEffect(() => {
+ if (!jsonEditingRef.current) {
+ const { patch } = buildPatchFromForm(form);
+ const json = Object.keys(patch).length > 0 ? JSON.stringify(patch, null, 2) : "";
+ setJSONPatch(json);
+ setJSONError(undefined);
+ }
+ }, [form]);
+
+ const handleJSONChange = useCallback((value: string) => {
+ jsonEditingRef.current = true;
+ setJSONPatch(value);
+ const trimmed = value.trim();
+ if (!trimmed) {
+ setJSONError(undefined);
+ setForm((prev) => ({ ...prev, pricingValues: {} }));
+ return;
+ }
+ try {
+ const parsed = JSON.parse(trimmed);
+ if (parsed == null || typeof parsed !== "object" || Array.isArray(parsed)) {
+ setJSONError("Patch must be a JSON object");
+ return;
+ }
+ const pricingValues: Partial> = {};
+ for (const [key, val] of Object.entries(parsed)) {
+ if (!patchKeys.includes(key as PricingFieldKey)) {
+ setJSONError(`Unknown field: ${key}`);
+ return;
+ }
+ if (typeof val !== "number" || Number.isNaN(val) || val < 0) {
+ setJSONError(`${key} must be a non-negative number`);
+ return;
+ }
+ pricingValues[key as PricingFieldKey] = String(val);
+ }
+ setJSONError(undefined);
+ setForm((prev) => ({ ...prev, pricingValues }));
+ } catch {
+ setJSONError("Invalid JSON");
+ }
+ }, []);
+
+ const handleFieldChange = useCallback(() => {
+ jsonEditingRef.current = false;
+ }, []);
+
+
+ const handleCloseDrawer = () => {
+ onOpenChange(false);
+ setRequestTypePopoverOpen(false);
+ };
+
+ const toggleRequestType = (requestType: string) => {
+ setForm((prev) => ({
+ ...prev,
+ requestTypes: prev.requestTypes.includes(requestType)
+ ? prev.requestTypes.filter((item) => item !== requestType)
+ : [...prev.requestTypes, requestType],
+ }));
+ };
+
+ const handleSave = async () => {
+ if (!form.name.trim()) {
+ toast.error("Name is required");
+ return;
+ }
+
+ if (
+ (resolvedScopeKind === "virtual_key" ||
+ resolvedScopeKind === "virtual_key_provider" ||
+ resolvedScopeKind === "virtual_key_provider_key") &&
+ !resolvedVirtualKeyID
+ ) {
+ toast.error("Virtual key is required");
+ return;
+ }
+ if ((resolvedScopeKind === "provider" || resolvedScopeKind === "virtual_key_provider") && !resolvedProviderID) {
+ toast.error("Provider is required");
+ return;
+ }
+ if (resolvedScopeKind === "provider_key" && !resolvedProviderKeyID) {
+ toast.error("Provider key is required");
+ return;
+ }
+ if (resolvedScopeKind === "virtual_key_provider_key" && (!resolvedProviderID || !resolvedProviderKeyID)) {
+ toast.error("Provider and provider key are required");
+ return;
+ }
+
+ const pError = patternError(form.matchType, form.pattern);
+ if (pError) {
+ toast.error(pError);
+ return;
+ }
+
+ if (form.requestTypes.length === 0) {
+ toast.error("At least one request type must be selected");
+ return;
+ }
+
+ if (jsonError) {
+ toast.error("Fix the JSON error before saving");
+ return;
+ }
+
+ const { patch, errors: pricingErrors } = buildPatchFromForm(form);
+ const firstPricingError = Object.values(pricingErrors)[0];
+ if (firstPricingError) {
+ toast.error(firstPricingError);
+ return;
+ }
+ if (Object.keys(patch).length === 0) {
+ toast.error("At least one pricing field must be overridden");
+ return;
+ }
+
+ let scopedVirtualKeyID: string | undefined;
+ let scopedProviderID: string | undefined;
+ let scopedProviderKeyID: string | undefined;
+
+ switch (resolvedScopeKind) {
+ case "global":
+ break;
+ case "provider":
+ scopedProviderID = resolvedProviderID;
+ break;
+ case "provider_key":
+ scopedProviderKeyID = resolvedProviderKeyID;
+ break;
+ case "virtual_key":
+ scopedVirtualKeyID = resolvedVirtualKeyID;
+ break;
+ case "virtual_key_provider":
+ scopedVirtualKeyID = resolvedVirtualKeyID;
+ scopedProviderID = resolvedProviderID;
+ break;
+ case "virtual_key_provider_key":
+ scopedVirtualKeyID = resolvedVirtualKeyID;
+ scopedProviderID = resolvedProviderID;
+ scopedProviderKeyID = resolvedProviderKeyID;
+ break;
+ }
+
+ const requestPayload: CreatePricingOverrideRequest = {
+ name: form.name.trim(),
+ scope_kind: resolvedScopeKind,
+ virtual_key_id: scopedVirtualKeyID,
+ provider_id: scopedProviderID,
+ provider_key_id: scopedProviderKeyID,
+ match_type: form.matchType,
+ pattern: form.pattern.trim(),
+ request_types: form.requestTypes.length > 0 ? form.requestTypes : [],
+ patch,
+ };
+
+ try {
+ if (editingOverride) {
+ await updateOverride({ id: editingOverride.id, data: requestPayload }).unwrap();
+ toast.success("Pricing override updated");
+ } else {
+ await createOverride(requestPayload).unwrap();
+ toast.success("Pricing override created");
+ }
+ handleCloseDrawer();
+ onSaved?.();
+ } catch (error) {
+ toast.error("Failed to save pricing override", { description: getErrorMessage(error) });
+ }
+ };
+
+
+ return (
+ (o ? onOpenChange(true) : handleCloseDrawer())}>
+
+
+ {editingOverride ? "Edit Pricing Override" : "Create Pricing Override"}
+
+
+
+
+
+
+ setForm((prev) => ({ ...prev, name: e.target.value }))} />
+
+
+ {shouldLockScope && scopeLock ? (
+
+
+
+
+ ) : (
+ <>
+
+
+
+
+
+ {form.scopeRoot === "virtual_key" && (
+
+
+
+ {virtualKeysError &&
Failed to load virtual keys: {getErrorMessage(virtualKeysError)}
}
+
+ )}
+
+
+
+
+
+ {providersError &&
Failed to load providers: {getErrorMessage(providersError)}
}
+
+
+ {form.providerID ? (
+
+
+
+
+ ) : (
+
+ )}
+
+
+ >
+ )}
+
+
+
+
+
+
+
+
+
+
+ setForm((prev) => ({ ...prev, pattern: e.target.value }))}
+ placeholder={form.matchType === "exact" ? "e.g., gpt-4o" : "e.g., gpt-4*"}
+ />
+
+
+
+
+
+
+
+
+
+
+ e.stopPropagation()}>
+ e.stopPropagation()}>
+ {REQUEST_TYPE_GROUPS.map((group) => (
+
+
{group.label}
+ {group.types.map((requestType) => {
+ const checked = form.requestTypes.includes(requestType);
+ return (
+
+ );
+ })}
+
+ ))}
+
+
+
+
+
+
+
+
+
+
+
{
+ handleFieldChange();
+ setForm((prev) => ({ ...prev, pricingValues: { ...prev.pricingValues, [key]: value } }));
+ }}
+ onFieldInteraction={handleFieldChange}
+ />
+
+
+
+
+
+
+
+ {jsonError &&
{jsonError}
}
+
+
+
+
+
+
+
+
+
+ );
+}
diff --git a/ui/app/workspace/custom-pricing/overrides/pricingOverridesEmptyState.tsx b/ui/app/workspace/custom-pricing/overrides/pricingOverridesEmptyState.tsx
new file mode 100644
index 0000000000..52c6dae93b
--- /dev/null
+++ b/ui/app/workspace/custom-pricing/overrides/pricingOverridesEmptyState.tsx
@@ -0,0 +1,45 @@
+"use client";
+
+import { Button } from "@/components/ui/button";
+import { ArrowUpRight, SlidersHorizontal } from "lucide-react";
+
+const PRICING_OVERRIDES_DOCS_URL = "https://docs.getbifrost.ai/features/governance/custom-pricing";
+
+interface PricingOverridesEmptyStateProps {
+ onCreateClick: () => void;
+}
+
+export function PricingOverridesEmptyState({ onCreateClick }: PricingOverridesEmptyStateProps) {
+ return (
+
+
+
+
+
+
Pricing overrides customize cost tracking per scope
+
+ Define custom per-token prices for specific providers, keys, or virtual keys to accurately reflect your negotiated rates.
+
+
+
+
+
+
+
+ );
+}
diff --git a/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx b/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx
new file mode 100644
index 0000000000..5432ee5abd
--- /dev/null
+++ b/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx
@@ -0,0 +1,387 @@
+"use client";
+
+import {
+ AlertDialog,
+ AlertDialogAction,
+ AlertDialogCancel,
+ AlertDialogContent,
+ AlertDialogDescription,
+ AlertDialogFooter,
+ AlertDialogHeader,
+ AlertDialogTitle,
+} from "@/components/ui/alertDialog";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table";
+import {
+ getErrorMessage,
+ useDeletePricingOverrideMutation,
+ useGetPricingOverridesQuery,
+ useGetProvidersQuery,
+ useGetVirtualKeysQuery,
+} from "@/lib/store";
+import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons";
+import { getProviderLabel } from "@/lib/constants/logs";
+import { PricingOverride, PricingOverrideScopeKind } from "@/lib/types/governance";
+import { useDebouncedValue } from "@/hooks/useDebounce";
+import { Input } from "@/components/ui/input";
+import { ChevronLeft, ChevronRight, Edit, Search, Trash2 } from "lucide-react";
+import { useSearchParams } from "next/navigation";
+import { useEffect, useMemo, useState } from "react";
+import { toast } from "sonner";
+import PricingOverrideSheet from "./pricingOverrideSheet";
+import { PricingOverridesEmptyState } from "./pricingOverridesEmptyState";
+
+type ScopeFilter = "all" | PricingOverrideScopeKind;
+
+function parseScopeKind(value: string | null): ScopeFilter {
+ if (
+ value === "global" ||
+ value === "provider" ||
+ value === "provider_key" ||
+ value === "virtual_key" ||
+ value === "virtual_key_provider" ||
+ value === "virtual_key_provider_key"
+ ) {
+ return value;
+ }
+ return "all";
+}
+
+// Returns the top-level scope label: "Global" or the virtual key name.
+function scopeLabel(override: PricingOverride, virtualKeyMap: Map): string {
+ const scopeKind = resolveScopeKind(override);
+ if (override.virtual_key_id && scopeKind.startsWith("virtual_key")) {
+ return "Virtual Key";
+ }
+ return "Global";
+}
+
+// Returns the key label for the override, or "-" when no specific key is scoped.
+function keyLabel(override: PricingOverride, keyLabelMap: Map): string {
+ if (!override.provider_key_id) {
+ if (!override.provider_id) return "-";
+ return "All Keys"
+ };
+ return keyLabelMap.get(override.provider_key_id) || override.provider_key_id;
+}
+
+// Returns the provider label for the override, or "-" if not applicable.
+function providerLabel(override: PricingOverride, providerMap: Map, keyProviderMap: Map): string {
+ const scopeKind = resolveScopeKind(override);
+ switch (scopeKind) {
+ case "provider":
+ case "virtual_key_provider":
+ return providerMap.get(override.provider_id || "") || override.provider_id || "-";
+ case "provider_key":
+ case "virtual_key_provider_key": {
+ const keyID = override.provider_key_id || "";
+ return providerMap.get(keyProviderMap.get(keyID) || "") || keyProviderMap.get(keyID) || "-";
+ }
+ default:
+ return "-";
+ }
+}
+
+function resolveScopeKind(override: PricingOverride): PricingOverrideScopeKind {
+ if (
+ override.scope_kind === "global" ||
+ override.scope_kind === "provider" ||
+ override.scope_kind === "provider_key" ||
+ override.scope_kind === "virtual_key" ||
+ override.scope_kind === "virtual_key_provider" ||
+ override.scope_kind === "virtual_key_provider_key"
+ ) {
+ return override.scope_kind;
+ }
+ if (override.virtual_key_id) {
+ if (override.provider_key_id) return "virtual_key_provider_key";
+ if (override.provider_id) return "virtual_key_provider";
+ return "virtual_key";
+ }
+ if (override.provider_key_id) return "provider_key";
+ if (override.provider_id) return "provider";
+ return "global";
+}
+
+const PAGE_SIZE = 25;
+
+export default function ScopedPricingOverridesView() {
+ const searchParams = useSearchParams();
+
+ const [scopeKind, setScopeKind] = useState(() => parseScopeKind(searchParams.get("scope_kind")));
+ const [virtualKeyID, setVirtualKeyID] = useState(() => (searchParams.get("virtual_key_id") || "").trim());
+ const [providerID, setProviderID] = useState(() => (searchParams.get("provider_id") || "").trim());
+ const [providerKeyID, setProviderKeyID] = useState(() => (searchParams.get("provider_key_id") || "").trim());
+
+ const [search, setSearch] = useState("");
+ const [offset, setOffset] = useState(0);
+ const debouncedSearch = useDebouncedValue(search, 300);
+
+ useEffect(() => {
+ setScopeKind(parseScopeKind(searchParams.get("scope_kind")));
+ setVirtualKeyID((searchParams.get("virtual_key_id") || "").trim());
+ setProviderID((searchParams.get("provider_id") || "").trim());
+ setProviderKeyID((searchParams.get("provider_key_id") || "").trim());
+ }, [searchParams]);
+
+ // Reset to first page when filters or search change
+ useEffect(() => {
+ setOffset(0);
+ }, [scopeKind, virtualKeyID, providerID, providerKeyID, debouncedSearch]);
+
+ const queryArgs = useMemo(() => ({
+ scopeKind: scopeKind === "all" ? undefined : scopeKind,
+ virtualKeyID: virtualKeyID || undefined,
+ providerID: providerID || undefined,
+ providerKeyID: providerKeyID || undefined,
+ limit: PAGE_SIZE,
+ offset,
+ search: debouncedSearch || undefined,
+ }), [scopeKind, virtualKeyID, providerID, providerKeyID, offset, debouncedSearch]);
+
+ const { data, isLoading, error } = useGetPricingOverridesQuery(queryArgs);
+
+ // Snap offset back when total shrinks past current page
+ const totalCount = data?.total_count ?? 0;
+ useEffect(() => {
+ if (!data || offset < totalCount) return;
+ setOffset(totalCount === 0 ? 0 : Math.floor((totalCount - 1) / PAGE_SIZE) * PAGE_SIZE);
+ }, [totalCount, offset]);
+ const { data: providersData } = useGetProvidersQuery();
+ const { data: virtualKeysData } = useGetVirtualKeysQuery();
+ const [deleteOverride, { isLoading: isDeleting }] = useDeletePricingOverrideMutation();
+
+ useEffect(() => {
+ if (error) {
+ toast.error("Failed to load pricing overrides", { description: getErrorMessage(error) });
+ }
+ }, [error]);
+
+ const [isDrawerOpen, setIsDrawerOpen] = useState(false);
+ const [editingOverride, setEditingOverride] = useState(null);
+ const [deleteTarget, setDeleteTarget] = useState(null);
+
+ const rows = data?.pricing_overrides ?? [];
+ const providers = useMemo(() => providersData ?? [], [providersData]);
+ const virtualKeys = useMemo(() => virtualKeysData?.virtual_keys ?? [], [virtualKeysData]);
+
+ const providerMap = useMemo(() => new Map(providers.map((provider) => [provider.name, provider.name])), [providers]);
+ const providerKeyOptions = useMemo(
+ () =>
+ providers.flatMap((provider) =>
+ (provider.keys || []).map((key) => ({
+ id: key.id,
+ label: key.name || key.id,
+ providerName: provider.name,
+ })),
+ ),
+ [providers],
+ );
+ const providerKeyProviderMap = useMemo(
+ () => new Map(providerKeyOptions.map((key) => [key.id, key.providerName])),
+ [providerKeyOptions],
+ );
+ const providerKeyLabelMap = useMemo(
+ () => new Map(providerKeyOptions.map((key) => [key.id, key.label])),
+ [providerKeyOptions],
+ );
+ const virtualKeyMap = useMemo(() => new Map(virtualKeys.map((vk) => [vk.id, vk.name])), [virtualKeys]);
+
+ const createScopeLock = useMemo(() => {
+ if (scopeKind === "all") return undefined;
+ return {
+ scopeKind,
+ virtualKeyID: virtualKeyID || undefined,
+ providerID: providerID || undefined,
+ providerKeyID: providerKeyID || undefined,
+ label: `${scopeKind}${virtualKeyID || providerID || providerKeyID ? " (filtered)" : ""}`,
+ };
+ }, [scopeKind, virtualKeyID, providerID, providerKeyID]);
+
+ const openCreateDrawer = () => {
+ setEditingOverride(null);
+ setIsDrawerOpen(true);
+ };
+
+ const openEditDrawer = (override: PricingOverride) => {
+ setEditingOverride(override);
+ setIsDrawerOpen(true);
+ };
+
+ const handleDeleteConfirm = async () => {
+ if (!deleteTarget) return;
+ try {
+ await deleteOverride(deleteTarget.id).unwrap();
+ toast.success("Pricing override deleted");
+ setDeleteTarget(null);
+ } catch (deleteError) {
+ toast.error("Failed to delete pricing override", { description: getErrorMessage(deleteError) });
+ }
+ };
+
+ const hasActiveFilters = debouncedSearch || scopeKind !== "all" || virtualKeyID || providerID || providerKeyID;
+
+ if (!isLoading && !error && totalCount === 0 && !hasActiveFilters) {
+ return (
+ <>
+
+
+ >
+ );
+ }
+
+ return (
+
+
+
+
Pricing Overrides
+
Set custom rates for any model across global or virtual key scopes, optionally narrowed to a specific provider or key
+
+
+
+
+ {/* Search */}
+
+
+ setSearch(e.target.value)}
+ className="pl-9"
+ data-testid="pricing-overrides-search-input"
+ />
+
+
+
+ {isLoading ? (
+
Loading overrides...
+ ) : error ? (
+
Failed to load pricing overrides. Please try refreshing the page.
+ ) : (
+
+
+
+ Name
+ Scope
+ Provider
+ Key
+ Model
+ Actions
+
+
+
+ {rows.length === 0 ? (
+
+
+ No matching pricing overrides found.
+
+
+ ) : rows.map((row) => (
+
+ {row.name || "-"}
+
+ {scopeLabel(row, virtualKeyMap)}
+
+
+ {(() => {
+ const name = providerLabel(row, providerMap, providerKeyProviderMap);
+ if (name === "-") return -;
+ return (
+
+
+ {getProviderLabel(name)}
+
+ );
+ })()}
+
+ {keyLabel(row, providerKeyLabelMap)}
+ {row.pattern}
+ e.stopPropagation()}>
+
+
+
+
+
+
+ ))}
+
+
+ )}
+
+
+ {/* Pagination */}
+ {totalCount > 0 && (
+
+
+ Showing {offset + 1}-{Math.min(offset + PAGE_SIZE, totalCount)} of {totalCount}
+
+
+
+
+
+
+ )}
+
+
+
+
(!open ? setDeleteTarget(null) : undefined)}>
+
+
+ Delete Pricing Override
+
+ Are you sure you want to delete "{deleteTarget?.name}"? This action cannot be undone.
+
+
+
+ Cancel
+ {
+ e.preventDefault();
+ void handleDeleteConfirm();
+ }}
+ disabled={isDeleting}
+ className="bg-destructive hover:bg-destructive/90"
+ >
+ {isDeleting ? "Deleting..." : "Delete"}
+
+
+
+
+
+ );
+}
diff --git a/ui/app/workspace/providers/fragments/index.ts b/ui/app/workspace/providers/fragments/index.ts
index 5d295be2a1..8a9fc1c308 100644
--- a/ui/app/workspace/providers/fragments/index.ts
+++ b/ui/app/workspace/providers/fragments/index.ts
@@ -6,5 +6,4 @@ export { GovernanceFormFragment } from "./governanceFormFragment";
export { NetworkFormFragment } from "./networkFormFragment";
export { PerformanceFormFragment } from "./performanceFormFragment";
export { PerformanceFormFragment as PerformanceTab } from "./performanceFormFragment";
-export { PricingOverridesFormFragment } from "./pricingOverridesFormFragment";
export { ProxyFormFragment } from "./proxyFormFragment";
diff --git a/ui/app/workspace/providers/fragments/pricingOverridesFormFragment.tsx b/ui/app/workspace/providers/fragments/pricingOverridesFormFragment.tsx
deleted file mode 100644
index 3d94b14b15..0000000000
--- a/ui/app/workspace/providers/fragments/pricingOverridesFormFragment.tsx
+++ /dev/null
@@ -1,136 +0,0 @@
-"use client";
-
-import { Button } from "@/components/ui/button";
-import { Textarea } from "@/components/ui/textarea";
-import { getErrorMessage, setProviderFormDirtyState, useAppDispatch } from "@/lib/store";
-import { useUpdateProviderMutation } from "@/lib/store/apis/providersApi";
-import { ModelProvider } from "@/lib/types/config";
-import { providerPricingOverrideSchema } from "@/lib/types/schemas";
-import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib";
-import { useEffect, useMemo, useState } from "react";
-import { toast } from "sonner";
-import { z } from "zod";
-
-interface PricingOverridesFormFragmentProps {
- provider: ModelProvider;
-}
-
-const pricingOverridesArraySchema = z.array(providerPricingOverrideSchema);
-
-const toPrettyJSON = (value: unknown) => JSON.stringify(value, null, 2);
-
-export function PricingOverridesFormFragment({ provider }: PricingOverridesFormFragmentProps) {
- const dispatch = useAppDispatch();
- const hasUpdateProviderAccess = useRbac(RbacResource.ModelProvider, RbacOperation.Update);
- const [updateProvider, { isLoading: isUpdatingProvider }] = useUpdateProviderMutation();
- const initialValue = useMemo(() => toPrettyJSON(provider.pricing_overrides ?? []), [provider.pricing_overrides]);
- const [overridesJSON, setOverridesJSON] = useState(initialValue);
- const [validationError, setValidationError] = useState("");
- const [hasUserEdits, setHasUserEdits] = useState(false);
- const isDirty = hasUserEdits && overridesJSON !== initialValue;
-
- useEffect(() => {
- if (isDirty) {
- return;
- }
- setOverridesJSON(initialValue);
- setValidationError("");
- }, [initialValue, isDirty, provider.name]);
-
- useEffect(() => {
- dispatch(setProviderFormDirtyState(isDirty));
- }, [dispatch, isDirty]);
-
- const onReset = () => {
- setOverridesJSON(initialValue);
- setValidationError("");
- setHasUserEdits(false);
- };
-
- const onSave = async () => {
- let parsed: unknown;
- try {
- parsed = JSON.parse(overridesJSON);
- } catch {
- setValidationError("Invalid JSON format.");
- return;
- }
-
- const validated = pricingOverridesArraySchema.safeParse(parsed);
- if (!validated.success) {
- setValidationError(validated.error.issues[0]?.message || "Invalid pricing overrides configuration.");
- return;
- }
-
- setValidationError("");
-
- try {
- await updateProvider({
- ...provider,
- }).unwrap();
- toast.success("Pricing overrides updated successfully");
- setOverridesJSON(toPrettyJSON(validated.data));
- setHasUserEdits(false);
- } catch (err) {
- toast.error("Failed to update pricing overrides", {
- description: getErrorMessage(err),
- });
- }
- };
-
- return (
-
-
-
Provider Pricing Overrides
-
- Enter a JSON array of override objects. Match precedence is exact > wildcard > regex. Unspecified fields fall back to
- datasheet pricing.
-
-
-
-
- );
-}
diff --git a/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx b/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx
index 3f2537dcd2..b45ca448da 100644
--- a/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx
+++ b/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx
@@ -1,4 +1,4 @@
-"use client"
+"use client";
import {
AlertDialog,
@@ -11,22 +11,22 @@ import {
AlertDialogTitle,
AlertDialogTrigger,
} from "@/components/ui/alertDialog";
-import { Badge } from "@/components/ui/badge"
-import { Button } from "@/components/ui/button"
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"
-import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
-import { getErrorMessage, useDeleteVirtualKeyMutation } from "@/lib/store"
-import { Customer, Team, VirtualKey } from "@/lib/types/governance"
-import { cn } from "@/lib/utils"
-import { formatCurrency } from "@/lib/utils/governance"
-import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"
-import { ChevronLeft, ChevronRight, Copy, Edit, Eye, EyeOff, Plus, Search, Trash2 } from "lucide-react"
-import { useMemo, useState } from "react"
-import { toast } from "sonner"
-import VirtualKeyDetailSheet from "./virtualKeyDetailsSheet"
-import { VirtualKeysEmptyState } from "./virtualKeysEmptyState"
-import VirtualKeySheet from "./virtualKeySheet"
+import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table";
+import { getErrorMessage, useDeleteVirtualKeyMutation } from "@/lib/store";
+import { Customer, Team, VirtualKey } from "@/lib/types/governance";
+import { cn } from "@/lib/utils";
+import { formatCurrency } from "@/lib/utils/governance";
+import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib";
+import { ChevronLeft, ChevronRight, Copy, Edit, Eye, EyeOff, Plus, Search, Trash2 } from "lucide-react";
+import { useMemo, useState } from "react";
+import { toast } from "sonner";
+import VirtualKeyDetailSheet from "./virtualKeyDetailsSheet";
+import { VirtualKeysEmptyState } from "./virtualKeysEmptyState";
+import VirtualKeySheet from "./virtualKeySheet";
interface VirtualKeysTableProps {
virtualKeys: VirtualKey[];
@@ -61,27 +61,27 @@ export default function VirtualKeysTable({
limit,
onOffsetChange,
}: VirtualKeysTableProps) {
- const [showVirtualKeySheet, setShowVirtualKeySheet] = useState(false)
- const [editingVirtualKeyId, setEditingVirtualKeyId] = useState(null)
- const [revealedKeys, setRevealedKeys] = useState>(new Set())
- const [selectedVirtualKeyId, setSelectedVirtualKeyId] = useState(null)
- const [showDetailSheet, setShowDetailSheet] = useState(false)
+ const [showVirtualKeySheet, setShowVirtualKeySheet] = useState(false);
+ const [editingVirtualKeyId, setEditingVirtualKeyId] = useState(null);
+ const [revealedKeys, setRevealedKeys] = useState>(new Set());
+ const [selectedVirtualKeyId, setSelectedVirtualKeyId] = useState(null);
+ const [showDetailSheet, setShowDetailSheet] = useState(false);
- // Derive objects from props so they stay in sync with RTK cache updates
- const editingVirtualKey = useMemo(
- () => (editingVirtualKeyId ? virtualKeys.find((vk) => vk.id === editingVirtualKeyId) ?? null : null),
- [editingVirtualKeyId, virtualKeys],
- )
- const selectedVirtualKey = useMemo(
- () => (selectedVirtualKeyId ? virtualKeys.find((vk) => vk.id === selectedVirtualKeyId) ?? null : null),
- [selectedVirtualKeyId, virtualKeys],
- )
+ // Derive objects from props so they stay in sync with RTK cache updates
+ const editingVirtualKey = useMemo(
+ () => (editingVirtualKeyId ? (virtualKeys.find((vk) => vk.id === editingVirtualKeyId) ?? null) : null),
+ [editingVirtualKeyId, virtualKeys],
+ );
+ const selectedVirtualKey = useMemo(
+ () => (selectedVirtualKeyId ? (virtualKeys.find((vk) => vk.id === selectedVirtualKeyId) ?? null) : null),
+ [selectedVirtualKeyId, virtualKeys],
+ );
- const hasCreateAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.Create)
- const hasUpdateAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.Update)
- const hasDeleteAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.Delete)
+ const hasCreateAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.Create);
+ const hasUpdateAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.Update);
+ const hasDeleteAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.Delete);
- const [deleteVirtualKey, { isLoading: isDeleting }] = useDeleteVirtualKeyMutation()
+ const [deleteVirtualKey, { isLoading: isDeleting }] = useDeleteVirtualKeyMutation();
const handleDelete = async (vkId: string) => {
try {
diff --git a/ui/components/sidebar.tsx b/ui/components/sidebar.tsx
index 5d67dfe1c3..e7dab835e2 100644
--- a/ui/components/sidebar.tsx
+++ b/ui/components/sidebar.tsx
@@ -32,6 +32,7 @@ import {
ShieldCheck,
ShieldUser,
Shuffle,
+ SlidersHorizontal,
SquareTerminal,
Telescope,
ToolCase,
@@ -193,10 +194,14 @@ const SidebarItemView = ({
highlightedUrl?: string;
}) => {
const hasSubItems = "subItems" in item && item.subItems && item.subItems.length > 0;
+ const isRouteMatch = (url: string) => {
+ if (url === "/workspace/custom-pricing") return pathname === url;
+ return pathname.startsWith(url);
+ };
const isAnySubItemActive =
hasSubItems &&
item.subItems?.some((subItem) => {
- return pathname.startsWith(subItem.url);
+ return isRouteMatch(subItem.url);
});
const handleClick = (e: React.MouseEvent) => {
@@ -245,12 +250,12 @@ const SidebarItemView = ({
data-testid={`nav-button-${item.title.toLowerCase().replace(/\s+/g, "-")}`}
data-nav-url={!hasSubItems ? item.url : undefined}
className={`relative h-7.5 cursor-pointer rounded-sm border px-3 transition-all duration-200 ${isHighlighted
- ? "bg-sidebar-accent text-accent-foreground border-primary/20"
- : isActive || isAnySubItemActive
- ? "bg-sidebar-accent text-primary border-primary/20"
- : item.hasAccess
- ? "hover:bg-sidebar-accent hover:text-accent-foreground border-transparent text-slate-500 dark:text-zinc-400"
- : "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent"
+ ? "bg-sidebar-accent text-accent-foreground border-primary/20"
+ : isActive || isAnySubItemActive
+ ? "bg-sidebar-accent text-primary border-primary/20"
+ : item.hasAccess
+ ? "hover:bg-sidebar-accent hover:text-accent-foreground border-transparent text-slate-500 dark:text-zinc-400"
+ : "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent"
} `}
onClick={hasSubItems ? handleClick : item.hasAccess ? (e) => handleNavigation(item.url, e) : undefined}
>
@@ -283,7 +288,7 @@ const SidebarItemView = ({
{item.subItems?.map((subItem: SidebarItem) => {
// For query param based subitems, check if tab matches
- const isSubItemActive = subItem.queryParam ? pathname === subItem.url : pathname.startsWith(subItem.url);
+ const isSubItemActive = subItem.queryParam ? pathname === subItem.url : isRouteMatch(subItem.url);
const isSubItemHighlighted = highlightedUrl === subItem.url;
const SubItemIcon = subItem.icon;
return (
@@ -292,12 +297,12 @@ const SidebarItemView = ({
data-testid={`nav-submenu-toggle-${subItem.title.toLowerCase().replace(/\s+/g, "-")}`}
data-nav-url={subItem.url}
className={`h-7 cursor-pointer rounded-sm px-2 transition-all duration-200 ${isSubItemHighlighted
- ? "bg-sidebar-accent text-accent-foreground"
- : isSubItemActive
- ? "bg-sidebar-accent text-primary font-medium"
- : subItem.hasAccess === false
- ? "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent"
- : "hover:bg-sidebar-accent hover:text-accent-foreground text-slate-500 dark:text-zinc-400"
+ ? "bg-sidebar-accent text-accent-foreground"
+ : isSubItemActive
+ ? "bg-sidebar-accent text-primary font-medium"
+ : subItem.hasAccess === false
+ ? "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent"
+ : "hover:bg-sidebar-accent hover:text-accent-foreground text-slate-500 dark:text-zinc-400"
}`}
onClick={(e) => (subItem.hasAccess === false ? undefined : handleSubItemClick(subItem, e))}
>
@@ -480,6 +485,13 @@ export default function AppSidebar() {
description: "Pricing configuration",
hasAccess: hasSettingsAccess,
},
+ {
+ title: "Pricing Overrides",
+ url: "/workspace/custom-pricing/overrides",
+ icon: SlidersHorizontal,
+ description: "Scoped pricing overrides",
+ hasAccess: hasSettingsAccess,
+ },
],
},
{
@@ -621,8 +633,8 @@ export default function AppSidebar() {
description: "Manage adaptive load balancer",
hasAccess: isAdaptiveRoutingAllowed,
},
- ...(isDbConnected
- ? [
+ ...(isDbConnected
+ ? [
{
title: "Prompt Repository",
url: "/workspace/prompt-repo",
@@ -648,7 +660,7 @@ export default function AppSidebar() {
],
},
]
- : []),
+ : []),
{
title: "Evals",
url: "https://www.getmaxim.ai",
@@ -789,8 +801,12 @@ export default function AppSidebar() {
// Auto-expand items when their subitems are active
useEffect(() => {
const newExpandedItems = new Set();
+ const isRouteMatch = (url: string) => {
+ if (url === "/workspace/custom-pricing") return pathname === url;
+ return pathname.startsWith(url);
+ };
items.forEach((item) => {
- if (item.subItems?.some((subItem) => pathname.startsWith(subItem.url))) {
+ if (item.subItems?.some((subItem) => isRouteMatch(subItem.url))) {
newExpandedItems.add(item.title);
}
});
@@ -917,6 +933,8 @@ export default function AppSidebar() {
const isActiveRoute = (url: string) => {
if (url === "/" && pathname === "/") return true;
+ // Avoid double-highlighting with "/workspace/custom-pricing/overrides"
+ if (url === "/workspace/custom-pricing") return pathname === url;
if (url !== "/" && pathname.startsWith(url)) {
if (url === "/workspace/config" && configExceptions.some((e) => pathname.startsWith(e))) {
return false;
diff --git a/ui/lib/store/apis/baseApi.ts b/ui/lib/store/apis/baseApi.ts
index 1ee8330d0e..a7d199d5a0 100644
--- a/ui/lib/store/apis/baseApi.ts
+++ b/ui/lib/store/apis/baseApi.ts
@@ -152,6 +152,7 @@ export const baseApi = createApi({
"APIKeys",
"OAuth2Config",
"RoutingRules",
+ "PricingOverrides",
"MCPToolGroups",
"AuditLogs",
"UserGovernance",
diff --git a/ui/lib/store/apis/governanceApi.ts b/ui/lib/store/apis/governanceApi.ts
index d6c836f713..fb6ae9084a 100644
--- a/ui/lib/store/apis/governanceApi.ts
+++ b/ui/lib/store/apis/governanceApi.ts
@@ -2,6 +2,8 @@ import {
Budget,
CreateCustomerRequest,
CreateModelConfigRequest,
+ CreatePricingOverrideRequest,
+ UpdatePricingOverrideRequest,
CreateTeamRequest,
CreateVirtualKeyRequest,
Customer,
@@ -11,6 +13,7 @@ import {
GetCustomersResponse,
GetModelConfigsParams,
GetModelConfigsResponse,
+ GetPricingOverridesResponse,
GetProviderGovernanceResponse,
GetRateLimitsResponse,
GetTeamsParams,
@@ -21,6 +24,7 @@ import {
HealthCheckResponse,
ModelConfig,
ProviderGovernance,
+ PricingOverride,
RateLimit,
ResetUsageRequest,
Team,
@@ -35,6 +39,8 @@ import {
} from "@/lib/types/governance";
import { baseApi } from "./baseApi";
+type PricingOverrideQueryArgs = { scopeKind?: string; virtualKeyID?: string; providerID?: string; providerKeyID?: string; limit?: number; offset?: number; search?: string };
+
export const governanceApi = baseApi.injectEndpoints({
endpoints: (builder) => ({
// Virtual Keys
@@ -562,6 +568,136 @@ export const governanceApi = baseApi.injectEndpoints({
},
}),
+ getPricingOverrides: builder.query({
+ query: (params) => ({
+ url: "/governance/pricing-overrides",
+ params: {
+ scope_kind: params?.scopeKind,
+ virtual_key_id: params?.virtualKeyID,
+ provider_id: params?.providerID,
+ provider_key_id: params?.providerKeyID,
+ ...(params?.limit !== undefined && { limit: params.limit }),
+ ...(params?.offset !== undefined && { offset: params.offset }),
+ ...(params?.search && { search: params.search }),
+ },
+ }),
+ providesTags: ["PricingOverrides"],
+ }),
+
+ createPricingOverride: builder.mutation<{ message: string; pricing_override: PricingOverride }, CreatePricingOverrideRequest>({
+ query: (data) => ({
+ url: "/governance/pricing-overrides",
+ method: "POST",
+ body: data,
+ }),
+ async onQueryStarted(_arg, { dispatch, getState, queryFulfilled }) {
+ try {
+ const { data } = await queryFulfilled;
+ const created = data.pricing_override;
+ const queries = (getState() as any).api.queries;
+ for (const entry of Object.values(queries) as any[]) {
+ if (entry?.endpointName !== "getPricingOverrides" || entry?.status !== "fulfilled") continue;
+ const args: PricingOverrideQueryArgs = entry.originalArgs ?? {};
+ const matchesQuery =
+ (!args.scopeKind || args.scopeKind === created.scope_kind) &&
+ (!args.virtualKeyID || args.virtualKeyID === created.virtual_key_id) &&
+ (!args.providerID || args.providerID === created.provider_id) &&
+ (!args.providerKeyID || args.providerKeyID === created.provider_key_id) &&
+ (!args.search || created.name?.toLowerCase().includes(args.search.toLowerCase()));
+ if (!matchesQuery) continue;
+ dispatch(
+ governanceApi.util.updateQueryData("getPricingOverrides", entry.originalArgs, (draft) => {
+ if (!draft.pricing_overrides) draft.pricing_overrides = [];
+ if (!args.offset || args.offset === 0) {
+ draft.pricing_overrides.unshift(created);
+ draft.count = (draft.count || 0) + 1;
+ draft.total_count = (draft.total_count || 0) + 1;
+ } else {
+ draft.total_count = (draft.total_count || 0) + 1;
+ }
+ }),
+ );
+ }
+ } catch {
+ // Mutation failed
+ }
+ },
+ }),
+
+ updatePricingOverride: builder.mutation<
+ { message: string; pricing_override: PricingOverride },
+ { id: string; data: UpdatePricingOverrideRequest }
+ >({
+ query: ({ id, data }) => ({
+ url: `/governance/pricing-overrides/${id}`,
+ method: "PUT",
+ body: data,
+ }),
+ async onQueryStarted({ id }, { dispatch, getState, queryFulfilled }) {
+ try {
+ const { data } = await queryFulfilled;
+ const updated = data.pricing_override;
+ const queries = (getState() as any).api.queries;
+ for (const entry of Object.values(queries) as any[]) {
+ if (entry?.endpointName !== "getPricingOverrides" || entry?.status !== "fulfilled") continue;
+ const args: PricingOverrideQueryArgs = entry.originalArgs ?? {};
+ const matchesQuery =
+ (!args.scopeKind || args.scopeKind === updated.scope_kind) &&
+ (!args.virtualKeyID || args.virtualKeyID === updated.virtual_key_id) &&
+ (!args.providerID || args.providerID === updated.provider_id) &&
+ (!args.providerKeyID || args.providerKeyID === updated.provider_key_id);
+ dispatch(
+ governanceApi.util.updateQueryData("getPricingOverrides", entry.originalArgs, (draft) => {
+ if (!draft.pricing_overrides) return;
+ const index = draft.pricing_overrides.findIndex((o) => o.id === id);
+ if (index === -1) return;
+ if (matchesQuery) {
+ draft.pricing_overrides[index] = updated;
+ } else {
+ // Override no longer belongs in this filtered list
+ draft.pricing_overrides.splice(index, 1);
+ draft.count = Math.max(0, (draft.count || 0) - 1);
+ draft.total_count = Math.max(0, (draft.total_count || 0) - 1);
+ }
+ }),
+ );
+ }
+ } catch {
+ // Mutation failed
+ }
+ },
+ }),
+
+ deletePricingOverride: builder.mutation<{ message: string }, string>({
+ query: (id) => ({
+ url: `/governance/pricing-overrides/${id}`,
+ method: "DELETE",
+ }),
+ async onQueryStarted(id, { dispatch, getState, queryFulfilled }) {
+ try {
+ await queryFulfilled;
+ const queries = (getState() as any).api.queries;
+ for (const entry of Object.values(queries) as any[]) {
+ if (entry?.endpointName !== "getPricingOverrides" || entry?.status !== "fulfilled") continue;
+ dispatch(
+ governanceApi.util.updateQueryData("getPricingOverrides", entry.originalArgs, (draft) => {
+ if (!draft.pricing_overrides) return;
+ const before = draft.pricing_overrides.length;
+ draft.pricing_overrides = draft.pricing_overrides.filter((o) => o.id !== id);
+ const removed = before - draft.pricing_overrides.length;
+ if (removed > 0) {
+ draft.count = Math.max(0, (draft.count || 0) - removed);
+ draft.total_count = Math.max(0, (draft.total_count || 0) - removed);
+ }
+ }),
+ );
+ }
+ } catch {
+ // Mutation failed
+ }
+ },
+ }),
+
// Provider Governance
getProviderGovernance: builder.query({
query: (params) => ({
@@ -679,6 +815,10 @@ export const {
useCreateModelConfigMutation,
useUpdateModelConfigMutation,
useDeleteModelConfigMutation,
+ useGetPricingOverridesQuery,
+ useCreatePricingOverrideMutation,
+ useUpdatePricingOverrideMutation,
+ useDeletePricingOverrideMutation,
// Provider Governance
useGetProviderGovernanceQuery,
diff --git a/ui/lib/types/config.ts b/ui/lib/types/config.ts
index 6fd46509f9..4cf23661b5 100644
--- a/ui/lib/types/config.ts
+++ b/ui/lib/types/config.ts
@@ -266,40 +266,6 @@ export interface CustomProviderConfig {
request_path_overrides?: Record;
}
-export type PricingOverrideMatchType = "exact" | "wildcard" | "regex";
-
-export interface ProviderPricingOverride {
- model_pattern: string;
- match_type: PricingOverrideMatchType;
- request_types?: RequestType[];
- input_cost_per_token?: number;
- output_cost_per_token?: number;
- input_cost_per_video_per_second?: number;
- input_cost_per_audio_per_second?: number;
- input_cost_per_character?: number;
- output_cost_per_character?: number;
- input_cost_per_token_above_128k_tokens?: number;
- input_cost_per_character_above_128k_tokens?: number;
- input_cost_per_image_above_128k_tokens?: number;
- input_cost_per_video_per_second_above_128k_tokens?: number;
- input_cost_per_audio_per_second_above_128k_tokens?: number;
- output_cost_per_token_above_128k_tokens?: number;
- output_cost_per_character_above_128k_tokens?: number;
- input_cost_per_token_above_200k_tokens?: number;
- output_cost_per_token_above_200k_tokens?: number;
- cache_creation_input_token_cost_above_200k_tokens?: number;
- cache_read_input_token_cost_above_200k_tokens?: number;
- cache_read_input_token_cost?: number;
- cache_creation_input_token_cost?: number;
- input_cost_per_token_batches?: number;
- output_cost_per_token_batches?: number;
- input_cost_per_image_token?: number;
- output_cost_per_image_token?: number;
- input_cost_per_image?: number;
- output_cost_per_image?: number;
- cache_read_input_image_token_cost?: number;
-}
-
// ProviderConfig matching Go's lib.ProviderConfig
export interface ModelProviderConfig {
keys: ModelProviderKey[];
@@ -310,7 +276,6 @@ export interface ModelProviderConfig {
send_back_raw_response?: boolean;
store_raw_request_response?: boolean;
custom_provider_config?: CustomProviderConfig;
- pricing_overrides?: ProviderPricingOverride[];
status?: "unknown" | "success" | "list_models_failed";
description?: string;
}
@@ -339,7 +304,6 @@ export interface AddProviderRequest {
send_back_raw_response?: boolean;
store_raw_request_response?: boolean;
custom_provider_config?: CustomProviderConfig;
- pricing_overrides?: ProviderPricingOverride[];
}
// UpdateProviderRequest matching Go's UpdateProviderRequest
@@ -352,7 +316,6 @@ export interface UpdateProviderRequest {
send_back_raw_response?: boolean;
store_raw_request_response?: boolean;
custom_provider_config?: CustomProviderConfig;
- pricing_overrides?: ProviderPricingOverride[];
}
// BifrostErrorResponse matching Go's schemas.BifrostError
diff --git a/ui/lib/types/governance.ts b/ui/lib/types/governance.ts
index 4ec77a057b..6b0f34eeb6 100644
--- a/ui/lib/types/governance.ts
+++ b/ui/lib/types/governance.ts
@@ -1,6 +1,6 @@
// Governance types that match the Go backend structures
-import { ModelProviderName } from "./config";
+import { ModelProviderName, RequestType } from "./config";
export interface Budget {
id: string;
@@ -362,6 +362,120 @@ export interface GetModelConfigsResponse {
offset: number;
}
+export type PricingOverrideScopeKind =
+ | "global"
+ | "provider"
+ | "provider_key"
+ | "virtual_key"
+ | "virtual_key_provider"
+ | "virtual_key_provider_key";
+export type PricingOverrideMatchType = "exact" | "wildcard";
+
+export interface PricingOverridePatch {
+ // Token
+ input_cost_per_token?: number;
+ output_cost_per_token?: number;
+ input_cost_per_token_batches?: number;
+ output_cost_per_token_batches?: number;
+ input_cost_per_token_priority?: number;
+ output_cost_per_token_priority?: number;
+ input_cost_per_character?: number;
+ // 128k tier
+ input_cost_per_token_above_128k_tokens?: number;
+ output_cost_per_token_above_128k_tokens?: number;
+ input_cost_per_image_above_128k_tokens?: number;
+ input_cost_per_video_per_second_above_128k_tokens?: number;
+ input_cost_per_audio_per_second_above_128k_tokens?: number;
+ // 200k tier
+ input_cost_per_token_above_200k_tokens?: number;
+ output_cost_per_token_above_200k_tokens?: number;
+ // Cache
+ cache_creation_input_token_cost?: number;
+ cache_read_input_token_cost?: number;
+ cache_creation_input_token_cost_above_200k_tokens?: number;
+ cache_read_input_token_cost_above_200k_tokens?: number;
+ cache_creation_input_token_cost_above_1hr?: number;
+ cache_creation_input_token_cost_above_1hr_above_200k_tokens?: number;
+ cache_creation_input_audio_token_cost?: number;
+ cache_read_input_token_cost_priority?: number;
+ cache_read_input_image_token_cost?: number;
+ // Image
+ input_cost_per_image_token?: number;
+ output_cost_per_image_token?: number;
+ input_cost_per_image?: number;
+ input_cost_per_pixel?: number;
+ output_cost_per_image?: number;
+ output_cost_per_pixel?: number;
+ output_cost_per_image_premium_image?: number;
+ output_cost_per_image_above_512_and_512_pixels?: number;
+ output_cost_per_image_above_512_and_512_pixels_and_premium_image?: number;
+ output_cost_per_image_above_1024_and_1024_pixels?: number;
+ output_cost_per_image_above_1024_and_1024_pixels_and_premium_image?: number;
+ output_cost_per_image_low_quality?: number;
+ output_cost_per_image_medium_quality?: number;
+ output_cost_per_image_high_quality?: number;
+ output_cost_per_image_auto_quality?: number;
+ // Audio/Video
+ input_cost_per_audio_token?: number;
+ input_cost_per_audio_per_second?: number;
+ input_cost_per_second?: number;
+ input_cost_per_video_per_second?: number;
+ output_cost_per_audio_token?: number;
+ output_cost_per_video_per_second?: number;
+ output_cost_per_second?: number;
+ // Other
+ search_context_cost_per_query?: number;
+ code_interpreter_cost_per_session?: number;
+}
+
+export interface PricingOverride {
+ id: string;
+ name: string;
+ scope_kind: PricingOverrideScopeKind;
+ virtual_key_id?: string;
+ provider_id?: string;
+ provider_key_id?: string;
+ match_type: PricingOverrideMatchType;
+ pattern: string;
+ request_types?: string[];
+ pricing_patch: string;
+ config_hash?: string;
+ created_at: string;
+ updated_at: string;
+}
+
+export interface CreatePricingOverrideRequest {
+ name: string;
+ scope_kind: PricingOverrideScopeKind;
+ virtual_key_id?: string;
+ provider_id?: string;
+ provider_key_id?: string;
+ match_type: PricingOverrideMatchType;
+ pattern: string;
+ request_types: RequestType[];
+ patch?: PricingOverridePatch;
+}
+
+export interface UpdatePricingOverrideRequest {
+ name?: string;
+ scope_kind?: PricingOverrideScopeKind;
+ virtual_key_id?: string;
+ provider_id?: string;
+ provider_key_id?: string;
+ match_type?: PricingOverrideMatchType;
+ pattern?: string;
+ request_types?: string[];
+ patch?: PricingOverridePatch;
+}
+
+export interface GetPricingOverridesResponse {
+ pricing_overrides: PricingOverride[];
+ count: number;
+ total_count: number;
+ limit: number;
+ offset: number;
+}
+
// Provider governance - for extending provider with budget/rate limit
export interface ProviderGovernance {
provider: string;
diff --git a/ui/lib/types/schemas.ts b/ui/lib/types/schemas.ts
index fb7dfc178c..ad3f39ba69 100644
--- a/ui/lib/types/schemas.ts
+++ b/ui/lib/types/schemas.ts
@@ -439,85 +439,6 @@ export const formCustomProviderConfigSchema = z
},
);
-export const providerPricingOverrideMatchTypeSchema = z.enum(["exact", "wildcard", "regex"]);
-
-export const providerPricingOverrideRequestTypeSchema = z.enum([
- "text_completion",
- "text_completion_stream",
- "chat_completion",
- "chat_completion_stream",
- "responses",
- "responses_stream",
- "embedding",
- "rerank",
- "speech",
- "speech_stream",
- "transcription",
- "transcription_stream",
- "image_generation",
- "image_generation_stream",
-]);
-
-export const providerPricingOverrideSchema = z
- .object({
- model_pattern: z.string().min(1, "Model pattern is required"),
- match_type: providerPricingOverrideMatchTypeSchema,
- request_types: z.array(providerPricingOverrideRequestTypeSchema).optional(),
- input_cost_per_token: z.number().min(0).optional(),
- output_cost_per_token: z.number().min(0).optional(),
- input_cost_per_video_per_second: z.number().min(0).optional(),
- input_cost_per_audio_per_second: z.number().min(0).optional(),
- input_cost_per_character: z.number().min(0).optional(),
- output_cost_per_character: z.number().min(0).optional(),
- input_cost_per_token_above_128k_tokens: z.number().min(0).optional(),
- input_cost_per_character_above_128k_tokens: z.number().min(0).optional(),
- input_cost_per_image_above_128k_tokens: z.number().min(0).optional(),
- input_cost_per_video_per_second_above_128k_tokens: z.number().min(0).optional(),
- input_cost_per_audio_per_second_above_128k_tokens: z.number().min(0).optional(),
- output_cost_per_token_above_128k_tokens: z.number().min(0).optional(),
- output_cost_per_character_above_128k_tokens: z.number().min(0).optional(),
- input_cost_per_token_above_200k_tokens: z.number().min(0).optional(),
- output_cost_per_token_above_200k_tokens: z.number().min(0).optional(),
- cache_creation_input_token_cost_above_200k_tokens: z.number().min(0).optional(),
- cache_read_input_token_cost_above_200k_tokens: z.number().min(0).optional(),
- cache_read_input_token_cost: z.number().min(0).optional(),
- cache_creation_input_token_cost: z.number().min(0).optional(),
- input_cost_per_token_batches: z.number().min(0).optional(),
- output_cost_per_token_batches: z.number().min(0).optional(),
- input_cost_per_image_token: z.number().min(0).optional(),
- output_cost_per_image_token: z.number().min(0).optional(),
- input_cost_per_image: z.number().min(0).optional(),
- output_cost_per_image: z.number().min(0).optional(),
- cache_read_input_image_token_cost: z.number().min(0).optional(),
- })
- .superRefine((data, ctx) => {
- if (data.match_type === "exact" && data.model_pattern.includes("*")) {
- ctx.addIssue({
- code: "custom",
- path: ["model_pattern"],
- message: "Exact match patterns cannot include '*'",
- });
- }
- if (data.match_type === "wildcard" && !data.model_pattern.includes("*")) {
- ctx.addIssue({
- code: "custom",
- path: ["model_pattern"],
- message: "Wildcard patterns must include '*'",
- });
- }
- if (data.match_type === "regex") {
- try {
- new RegExp(data.model_pattern);
- } catch {
- ctx.addIssue({
- code: "custom",
- path: ["model_pattern"],
- message: "Invalid regex pattern",
- });
- }
- }
- });
-
// Full model provider config schema
export const modelProviderConfigSchema = z.object({
keys: z.array(modelProviderKeySchema).min(1, "At least one key is required"),
@@ -528,7 +449,6 @@ export const modelProviderConfigSchema = z.object({
send_back_raw_response: z.boolean().optional(),
store_raw_request_response: z.boolean().optional(),
custom_provider_config: customProviderConfigSchema.optional(),
- pricing_overrides: z.array(providerPricingOverrideSchema).optional(),
});
// Model provider schema
@@ -546,7 +466,6 @@ export const formModelProviderConfigSchema = z.object({
send_back_raw_response: z.boolean().optional(),
store_raw_request_response: z.boolean().optional(),
custom_provider_config: formCustomProviderConfigSchema.optional(),
- pricing_overrides: z.array(providerPricingOverrideSchema).optional(),
});
// Flexible model provider schema for form data - allows any string for name
@@ -565,7 +484,6 @@ export const addProviderRequestSchema = z.object({
send_back_raw_response: z.boolean().optional(),
store_raw_request_response: z.boolean().optional(),
custom_provider_config: customProviderConfigSchema.optional(),
- pricing_overrides: z.array(providerPricingOverrideSchema).optional(),
});
// Update provider request schema
@@ -578,7 +496,6 @@ export const updateProviderRequestSchema = z.object({
send_back_raw_response: z.boolean().optional(),
store_raw_request_response: z.boolean().optional(),
custom_provider_config: customProviderConfigSchema.optional(),
- pricing_overrides: z.array(providerPricingOverrideSchema).optional(),
});
// Cache config schema