Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ require (
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/arch v0.23.0 // indirect
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect
golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 // indirect
golang.org/x/sys v0.41.0 // indirect
golang.org/x/text v0.33.0 // indirect
)
3 changes: 1 addition & 2 deletions cli/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ github.com/zalando/go-keyring v0.2.6 h1:r7Yc3+H+Ux0+M72zacZoItR3UDxeWfKTcabvkI8u
github.com/zalando/go-keyring v0.2.6/go.mod h1:2TCrxYrbUNYfNS/Kgy/LSrkSQzZ5UPVH85RwfczwvcI=
golang.org/x/arch v0.23.0 h1:lKF64A2jF6Zd8L0knGltUnegD62JMFBiCPBmQpToHhg=
golang.org/x/arch v0.23.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM=
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8=
golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 h1:zfMcR1Cs4KNuomFFgGefv5N0czO2XZpUbxGUy8i8ug0=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
Expand Down
2 changes: 1 addition & 1 deletion core/bifrost.go
Original file line number Diff line number Diff line change
Expand Up @@ -4815,7 +4815,7 @@ func executeRequestWithRetries[T any](
} else {
// Populate LLM response attributes for non-streaming responses
if resp, ok := any(result).(*schemas.BifrostResponse); ok {
tracer.PopulateLLMResponseAttributes(handle, resp, bifrostError)
tracer.PopulateLLMResponseAttributes(ctx, handle, resp, bifrostError)
}

// End span with appropriate status
Expand Down
4 changes: 2 additions & 2 deletions core/providers/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2611,10 +2611,10 @@ func completeDeferredSpan(ctx *schemas.BifrostContext, result *schemas.BifrostRe

if accumulatedResp != nil {
// Use accumulated response for attributes (includes full content, tool calls, etc.)
tracer.PopulateLLMResponseAttributes(handle, accumulatedResp, err)
tracer.PopulateLLMResponseAttributes(ctx, handle, accumulatedResp, err)
} else if result != nil {
// Fall back to final chunk if no accumulated data (shouldn't happen normally)
tracer.PopulateLLMResponseAttributes(handle, result, err)
tracer.PopulateLLMResponseAttributes(ctx, handle, result, err)
}

// Finalize aggregated post-hook spans before ending the LLM span
Expand Down
106 changes: 22 additions & 84 deletions core/schemas/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ import (
)

const (
DefaultMaxRetries = 0
DefaultRetryBackoffInitial = 500 * time.Millisecond
DefaultRetryBackoffMax = 5 * time.Second
DefaultMaxRetries = 0
DefaultRetryBackoffInitial = 500 * time.Millisecond
DefaultRetryBackoffMax = 5 * time.Second
DefaultRequestTimeoutInSeconds = 30
DefaultMaxConnDurationInSeconds = 300 // 5 minutes — forces connection recycling to prevent stale connections from NAT/LB silent drops
DefaultBufferSize = 5000
DefaultConcurrency = 1000
DefaultStreamBufferSize = 256
DefaultStreamIdleTimeoutInSeconds = 60 // Idle timeout per stream chunk — if no data for this many seconds, bifrost closes the connection
DefaultMaxConnDurationInSeconds = 300 // 5 minutes — forces connection recycling to prevent stale connections from NAT/LB silent drops
DefaultBufferSize = 5000
DefaultConcurrency = 1000
DefaultStreamBufferSize = 256
DefaultStreamIdleTimeoutInSeconds = 60 // Idle timeout per stream chunk — if no data for this many seconds, bifrost closes the connection
)

// Pre-defined errors for provider operations
Expand Down Expand Up @@ -49,14 +49,14 @@ const (
// - When marshaling to JSON: a time.Duration is converted to milliseconds
type NetworkConfig struct {
// BaseURL is supported for OpenAI, Anthropic, Cohere, Mistral, and Ollama providers (required for Ollama)
BaseURL string `json:"base_url,omitempty"` // Base URL for the provider (optional)
ExtraHeaders map[string]string `json:"extra_headers,omitempty"` // Additional headers to include in requests (optional)
DefaultRequestTimeoutInSeconds int `json:"default_request_timeout_in_seconds"` // Default timeout for requests
MaxRetries int `json:"max_retries"` // Maximum number of retries
RetryBackoffInitial time.Duration `json:"retry_backoff_initial"` // Initial backoff duration (stored as nanoseconds, JSON as milliseconds)
RetryBackoffMax time.Duration `json:"retry_backoff_max"` // Maximum backoff duration (stored as nanoseconds, JSON as milliseconds)
InsecureSkipVerify bool `json:"insecure_skip_verify,omitempty"` // Disables TLS certificate verification for provider connections
CACertPEM string `json:"ca_cert_pem,omitempty"` // PEM-encoded CA certificate to trust for provider endpoint connections
BaseURL string `json:"base_url,omitempty"` // Base URL for the provider (optional)
ExtraHeaders map[string]string `json:"extra_headers,omitempty"` // Additional headers to include in requests (optional)
DefaultRequestTimeoutInSeconds int `json:"default_request_timeout_in_seconds"` // Default timeout for requests
MaxRetries int `json:"max_retries"` // Maximum number of retries
RetryBackoffInitial time.Duration `json:"retry_backoff_initial"` // Initial backoff duration (stored as nanoseconds, JSON as milliseconds)
RetryBackoffMax time.Duration `json:"retry_backoff_max"` // Maximum backoff duration (stored as nanoseconds, JSON as milliseconds)
InsecureSkipVerify bool `json:"insecure_skip_verify,omitempty"` // Disables TLS certificate verification for provider connections
CACertPEM string `json:"ca_cert_pem,omitempty"` // PEM-encoded CA certificate to trust for provider endpoint connections
StreamIdleTimeoutInSeconds int `json:"stream_idle_timeout_in_seconds,omitempty"` // Idle timeout per stream chunk (0 = use default 60s)
}

Expand Down Expand Up @@ -387,67 +387,6 @@ type CustomProviderConfig struct {
RequestPathOverrides map[RequestType]string `json:"request_path_overrides,omitempty"` // Mapping of request type to its custom path which will override the default path of the provider (not allowed for Bedrock)
}

type PricingOverrideMatchType string

const (
PricingOverrideMatchExact PricingOverrideMatchType = "exact"
PricingOverrideMatchWildcard PricingOverrideMatchType = "wildcard"
PricingOverrideMatchRegex PricingOverrideMatchType = "regex"
)

// ProviderPricingOverride contains a partial pricing patch applied at lookup time.
// Any nil field falls back to the base pricing data.
type ProviderPricingOverride struct {
ModelPattern string `json:"model_pattern"`
MatchType PricingOverrideMatchType `json:"match_type"`
RequestTypes []RequestType `json:"request_types,omitempty"`

// Basic token pricing
InputCostPerToken *float64 `json:"input_cost_per_token,omitempty"`
OutputCostPerToken *float64 `json:"output_cost_per_token,omitempty"`

// Additional pricing for media
InputCostPerVideoPerSecond *float64 `json:"input_cost_per_video_per_second,omitempty"`
InputCostPerAudioPerSecond *float64 `json:"input_cost_per_audio_per_second,omitempty"`

// Character-based pricing
InputCostPerCharacter *float64 `json:"input_cost_per_character,omitempty"`

// Pricing above 128k tokens
InputCostPerTokenAbove128kTokens *float64 `json:"input_cost_per_token_above_128k_tokens,omitempty"`
InputCostPerImageAbove128kTokens *float64 `json:"input_cost_per_image_above_128k_tokens,omitempty"`
InputCostPerVideoPerSecondAbove128kTokens *float64 `json:"input_cost_per_video_per_second_above_128k_tokens,omitempty"`
InputCostPerAudioPerSecondAbove128kTokens *float64 `json:"input_cost_per_audio_per_second_above_128k_tokens,omitempty"`
OutputCostPerTokenAbove128kTokens *float64 `json:"output_cost_per_token_above_128k_tokens,omitempty"`

// Pricing above 200k tokens
InputCostPerTokenAbove200kTokens *float64 `json:"input_cost_per_token_above_200k_tokens,omitempty"`
OutputCostPerTokenAbove200kTokens *float64 `json:"output_cost_per_token_above_200k_tokens,omitempty"`
CacheCreationInputTokenCostAbove200kTokens *float64 `json:"cache_creation_input_token_cost_above_200k_tokens,omitempty"`
CacheReadInputTokenCostAbove200kTokens *float64 `json:"cache_read_input_token_cost_above_200k_tokens,omitempty"`

// Cache and batch pricing
CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost,omitempty"`
CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost,omitempty"`
InputCostPerTokenBatches *float64 `json:"input_cost_per_token_batches,omitempty"`
OutputCostPerTokenBatches *float64 `json:"output_cost_per_token_batches,omitempty"`

// Image generation pricing
InputCostPerImageToken *float64 `json:"input_cost_per_image_token,omitempty"`
OutputCostPerImageToken *float64 `json:"output_cost_per_image_token,omitempty"`
InputCostPerImage *float64 `json:"input_cost_per_image,omitempty"`
OutputCostPerImage *float64 `json:"output_cost_per_image,omitempty"`
OutputCostPerImageAbove1024x1024Pixels *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels,omitempty"`
OutputCostPerImageAbove1024x1024PixelsPremium *float64 `json:"output_cost_per_image_above_1024_and_1024_pixels_and_premium_image,omitempty"`
OutputCostPerImageAbove2048x2048Pixels *float64 `json:"output_cost_per_image_above_2048_and_2048_pixels,omitempty"`
OutputCostPerImageAbove4096x4096Pixels *float64 `json:"output_cost_per_image_above_4096_and_4096_pixels,omitempty"`
OutputCostPerImageLowQuality *float64 `json:"output_cost_per_image_low_quality,omitempty"`
OutputCostPerImageMediumQuality *float64 `json:"output_cost_per_image_medium_quality,omitempty"`
OutputCostPerImageHighQuality *float64 `json:"output_cost_per_image_high_quality,omitempty"`
OutputCostPerImageAutoQuality *float64 `json:"output_cost_per_image_auto_quality,omitempty"`
CacheReadInputImageTokenCost *float64 `json:"cache_read_input_image_token_cost,omitempty"`
}

// IsOperationAllowed checks if a specific operation is allowed for this custom provider
func (cpc *CustomProviderConfig) IsOperationAllowed(operation RequestType) bool {
if cpc == nil || cpc.AllowedRequests == nil {
Expand All @@ -463,13 +402,12 @@ type ProviderConfig struct {
NetworkConfig NetworkConfig `json:"network_config"` // Network configuration
ConcurrencyAndBufferSize ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings
// Logger instance, can be provided by the user or bifrost default logger is used if not provided
Logger Logger `json:"-"`
ProxyConfig *ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration
SendBackRawRequest bool `json:"send_back_raw_request"` // Send raw request back in the bifrost response (default: false)
SendBackRawResponse bool `json:"send_back_raw_response"` // Send raw response back in the bifrost response (default: false)
StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)
CustomProviderConfig *CustomProviderConfig `json:"custom_provider_config,omitempty"`
PricingOverrides []ProviderPricingOverride `json:"pricing_overrides,omitempty"`
Logger Logger `json:"-"`
ProxyConfig *ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration
SendBackRawRequest bool `json:"send_back_raw_request"` // Send raw request back in the bifrost response (default: false)
SendBackRawResponse bool `json:"send_back_raw_response"` // Send raw response back in the bifrost response (default: false)
StoreRawRequestResponse bool `json:"store_raw_request_response"` // Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)
CustomProviderConfig *CustomProviderConfig `json:"custom_provider_config,omitempty"`
}

func (config *ProviderConfig) CheckAndSetDefaults() {
Expand Down
4 changes: 2 additions & 2 deletions core/schemas/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ type Tracer interface {

// PopulateLLMResponseAttributes populates all LLM-specific response attributes on the span.
// This includes output messages, tokens, usage stats, and error information if present.
PopulateLLMResponseAttributes(handle SpanHandle, resp *BifrostResponse, err *BifrostError)
PopulateLLMResponseAttributes(ctx *BifrostContext, handle SpanHandle, resp *BifrostResponse, err *BifrostError)
Comment thread
Pratham-Mishra04 marked this conversation as resolved.

// StoreDeferredSpan stores a span handle for later completion (used for streaming requests).
// The span handle is stored keyed by trace ID so it can be retrieved when the stream completes.
Expand Down Expand Up @@ -144,7 +144,7 @@ func (n *NoOpTracer) AddEvent(_ SpanHandle, _ string, _ map[string]any) {}
func (n *NoOpTracer) PopulateLLMRequestAttributes(_ SpanHandle, _ *BifrostRequest) {}

// PopulateLLMResponseAttributes does nothing.
func (n *NoOpTracer) PopulateLLMResponseAttributes(_ SpanHandle, _ *BifrostResponse, _ *BifrostError) {
func (n *NoOpTracer) PopulateLLMResponseAttributes(_ *BifrostContext, _ SpanHandle, _ *BifrostResponse, _ *BifrostError) {
}

// StoreDeferredSpan does nothing.
Expand Down
3 changes: 2 additions & 1 deletion docs/architecture/framework/model-catalog.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ Calculate costs from a Bifrost response:
// Calculate cost for a completed request
cost := modelCatalog.CalculateCost(
result, // *schemas.BifrostResponse
nil, // *PricingLookupScopes (nil = no scoped overrides)
)

logger.Info("Request cost: $%.6f", cost)
Expand All @@ -199,7 +200,7 @@ logger.Info("Request cost: $%.6f", cost)

```go
// CalculateCost handles all cost scenarios including cache-aware pricing
cost := modelCatalog.CalculateCost(result) // *schemas.BifrostResponse
cost := modelCatalog.CalculateCost(result, nil) // *schemas.BifrostResponse, *PricingLookupScopes

// Cache hits return 0 for direct hits, embedding cost for semantic matches
// Cache misses return base model cost + embedding generation cost
Expand Down
1 change: 1 addition & 0 deletions docs/docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@
"providers/reasoning",
"providers/performance",
"providers/custom-providers",
"providers/custom-pricing",
"providers/request-options"
]
},
Expand Down
Binary file added docs/media/ui-custom-pricing-form.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/media/ui-custom-pricing-table.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading