diff --git a/helm-charts/bifrost/Chart.yaml b/helm-charts/bifrost/Chart.yaml index e014608d6d..4082de9f30 100644 --- a/helm-charts/bifrost/Chart.yaml +++ b/helm-charts/bifrost/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: bifrost description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers type: application -version: 2.0.18 +version: 2.1.0-prerelease2 appVersion: "1.4.11" keywords: - ai @@ -16,5 +16,4 @@ sources: maintainers: - name: Bifrost Team email: support@getbifrost.ai -icon: https://www.getbifrost.ai/favicon.png - +icon: https://www.getbifrost.ai/favicon.png \ No newline at end of file diff --git a/helm-charts/bifrost/README.md b/helm-charts/bifrost/README.md index 3c8c993ae9..c417f01fd1 100644 --- a/helm-charts/bifrost/README.md +++ b/helm-charts/bifrost/README.md @@ -4,17 +4,26 @@ Official Helm charts for deploying [Bifrost](https://github.com/maximhq/bifrost) - a high-performance AI gateway with unified interface for multiple providers. -**Latest Version:** 2.0.18 +**Latest Version:** 2.1.0-prerelease2 ## Changelog -### v2.0.18 +### v2.1.0-prerelease2 (prerelease) -- Fixed MCP client config template to correctly map camelCase keys in Helm values: - - `toolsToExecute` → `tools_to_execute` - - `toolsToAutoExecute` → `tools_to_auto_execute` - - `authType` → `auth_type` - - `oauthConfigId` → `oauth_config_id` +- Synced helm `values.schema.json` with transport `config.schema.json` — fixed virtual key and budget drift: + - Removed `required: [mcp_client_id]` constraint on `virtualKeys[].mcp_configs[]` items — canonical schema accepts either `mcp_client_id` (DB form) or `mcp_client_name` (config-file form, resolved to ID at startup) + - Added `mcp_client_name` as an allowed property on `virtualKeys[].mcp_configs[]` items + - Added `calendar_aligned` (boolean) on `virtualKeys[]` — field now lives on the virtual key, applies uniformly to all budgets under it + - Removed stale `budget_id` from `virtualKeys[]` — `TableVirtualKey` has no `BudgetID`; budgets link via foreign key from the budget table + - Removed stale `calendar_aligned` from `budgets[]` — moved to virtual key level + +### v2.0.17 + +- Added object storage support (S3/GCS) for offloading log payloads from the database +- Added `storage.logsStore.objectStorage` configuration with S3 and GCS backend support +- Added object storage credential injection from Kubernetes secrets (`existingSecret`) +- Added `object_storage` schema to `config.schema.json` under `logs_store` +- Updated deployment and stateful templates with object storage secret env vars ### v2.0.16 @@ -22,8 +31,17 @@ Official Helm charts for deploying [Bifrost](https://github.com/maximhq/bifrost) ### v2.0.15 -- Added `whitelistedRoutes` client config property for routes that bypass auth middleware -- Added `whitelistedRoutes` to Helm schema, values, and template rendering +- Synced helm schema with transport `config.schema.json` — added missing properties: + - `client.mcpDisableAutoToolInject` — disable automatic MCP tool injection + - `governance.budgets[].calendar_aligned` — snap budget resets to calendar boundaries + - `governance.pricingOverrides` — scoped pricing overrides for the model catalog + - `mcp.clientConfigs[].allowedExtraHeaders` — header allowlist per MCP client + - `mcp.clientConfigs[].allowOnAllVirtualKeys` — make MCP server accessible to all virtual keys + - `mcp.toolManagerConfig.disableAutoToolInject` — disable auto tool injection at manager level + - `networkConfig.beta_header_overrides` — override Anthropic beta header support per provider + - `websocket` — full WebSocket gateway tuning (connections, pool, transcript buffer) +- Fixed SSE `connectionString` not being rendered in `_helpers.tpl` for MCP clients +- Added template rendering for all new properties in `_helpers.tpl` ### v2.0.14 @@ -451,6 +469,43 @@ autoscaling: targetMemoryUtilizationPercentage: 80 ``` +### Referencing Secrets in MCP Headers + +`bifrost.mcp.clientConfigs[].headers` is a free-form `map` +whose values can contain auth tokens. The chart does not wrap this map with +a bespoke `secretRef` — a per-header dict would explode the values surface. +Instead, use the standard pattern: + +1. Write `env.MY_HEADER_VAR` as the header value in `values.yaml`: + ```yaml + bifrost: + mcp: + clientConfigs: + - name: "my-mcp" + connectionType: "http" + headers: + Authorization: "env.MY_MCP_AUTH" + ``` +2. Inject the env var into the pod via the chart's top-level `envFrom:` or + `env:` pass-through — e.g., in `values.yaml`: + ```yaml + envFrom: + - secretRef: + name: my-mcp-auth-secret + # OR: + env: + - name: MY_MCP_AUTH + valueFrom: + secretKeyRef: + name: my-mcp-auth-secret + key: authorization + ``` + +For `bifrost.mcp.clientConfigs[].connectionString` itself, prefer the +chart-native `secretRef` (`name` + `connectionStringKey`) instead — the +chart will inject `BIFROST_MCP__CONNECTION_STRING` and rewrite the +config automatically. + ## Example Configurations The chart includes pre-configured examples in `values-examples/`: @@ -614,7 +669,7 @@ bifrost: config: service_name: "bifrost" collector_url: "http://otel-collector:4317" - trace_type: "otel" + trace_type: "genai_extension" protocol: "grpc" ``` diff --git a/helm-charts/bifrost/templates/_helpers.tpl b/helm-charts/bifrost/templates/_helpers.tpl index ac322985f5..94262bb421 100644 --- a/helm-charts/bifrost/templates/_helpers.tpl +++ b/helm-charts/bifrost/templates/_helpers.tpl @@ -227,8 +227,21 @@ false {{- if .Values.bifrost.client.maxRequestBodySizeMb }} {{- $_ := set $client "max_request_body_size_mb" .Values.bifrost.client.maxRequestBodySizeMb }} {{- end }} -{{- if hasKey .Values.bifrost.client "enableLitellmFallbacks" }} -{{- $_ := set $client "enable_litellm_fallbacks" .Values.bifrost.client.enableLitellmFallbacks }} +{{- if .Values.bifrost.client.compat }} +{{- $compat := dict }} +{{- if hasKey .Values.bifrost.client.compat "convertTextToChat" }} +{{- $_ := set $compat "convert_text_to_chat" .Values.bifrost.client.compat.convertTextToChat }} +{{- end }} +{{- if hasKey .Values.bifrost.client.compat "convertChatToResponses" }} +{{- $_ := set $compat "convert_chat_to_responses" .Values.bifrost.client.compat.convertChatToResponses }} +{{- end }} +{{- if hasKey .Values.bifrost.client.compat "shouldDropParams" }} +{{- $_ := set $compat "should_drop_params" .Values.bifrost.client.compat.shouldDropParams }} +{{- end }} +{{- if hasKey .Values.bifrost.client.compat "shouldConvertParams" }} +{{- $_ := set $compat "should_convert_params" .Values.bifrost.client.compat.shouldConvertParams }} +{{- end }} +{{- $_ := set $client "compat" $compat }} {{- end }} {{- if .Values.bifrost.client.prometheusLabels }} {{- $_ := set $client "prometheus_labels" .Values.bifrost.client.prometheusLabels }} @@ -284,6 +297,12 @@ false {{- if hasKey .Values.bifrost.client "hideDeletedVirtualKeysInFilters" }} {{- $_ := set $client "hide_deleted_virtual_keys_in_filters" .Values.bifrost.client.hideDeletedVirtualKeysInFilters }} {{- end }} +{{- if hasKey .Values.bifrost.client "mcpDisableAutoToolInject" }} +{{- $_ := set $client "mcp_disable_auto_tool_inject" .Values.bifrost.client.mcpDisableAutoToolInject }} +{{- end }} +{{- if .Values.bifrost.client.routingChainMaxDepth }} +{{- $_ := set $client "routing_chain_max_depth" .Values.bifrost.client.routingChainMaxDepth }} +{{- end }} {{- $_ := set $config "client" $client }} {{- end }} {{- /* Framework */ -}} @@ -357,6 +376,9 @@ false {{- if .Values.bifrost.governance.providers }} {{- $_ := set $governance "providers" .Values.bifrost.governance.providers }} {{- end }} +{{- if .Values.bifrost.governance.pricingOverrides }} +{{- $_ := set $governance "pricing_overrides" .Values.bifrost.governance.pricingOverrides }} +{{- end }} {{- if .Values.bifrost.governance.authConfig }} {{- $authConfig := dict }} {{- if and .Values.bifrost.governance.authConfig.existingSecret .Values.bifrost.governance.authConfig.usernameKey }} @@ -379,7 +401,7 @@ false {{- $_ := set $governance "auth_config" $authConfig }} {{- end }} {{- end }} -{{- if or $governance.budgets $governance.rate_limits $governance.customers $governance.teams $governance.virtual_keys $governance.routing_rules $governance.model_configs $governance.providers $governance.auth_config }} +{{- if or $governance.budgets $governance.rate_limits $governance.customers $governance.teams $governance.virtual_keys $governance.routing_rules $governance.model_configs $governance.providers $governance.pricing_overrides $governance.auth_config }} {{- $_ := set $config "governance" $governance }} {{- end }} {{- end }} @@ -466,16 +488,17 @@ false {{- end }} {{- $_ := set $config "cluster_config" $cluster }} {{- end }} -{{- /* SAML Config */ -}} -{{- if and .Values.bifrost.saml .Values.bifrost.saml.enabled }} -{{- $saml := dict "enabled" true }} -{{- if .Values.bifrost.saml.provider }} -{{- $_ := set $saml "provider" .Values.bifrost.saml.provider }} +{{- /* SCIM Config */ -}} +{{- $scimValues := .Values.bifrost.scim }} +{{- if and $scimValues $scimValues.enabled }} +{{- $scim := dict "enabled" true }} +{{- if $scimValues.provider }} +{{- $_ := set $scim "provider" $scimValues.provider }} {{- end }} -{{- if .Values.bifrost.saml.config }} -{{- $_ := set $saml "config" .Values.bifrost.saml.config }} +{{- if $scimValues.config }} +{{- $_ := set $scim "config" $scimValues.config }} {{- end }} -{{- $_ := set $config "saml_config" $saml }} +{{- $_ := set $config "scim_config" $scim }} {{- end }} {{- /* Load Balancer Config */ -}} {{- if and .Values.bifrost.loadBalancer .Values.bifrost.loadBalancer.enabled }} @@ -552,6 +575,64 @@ false {{- $sqliteLogsStore := dict "enabled" true "type" "sqlite" "config" (dict "path" (printf "%s/logs.db" .Values.bifrost.appDir)) }} {{- $_ := set $config "logs_store" $sqliteLogsStore }} {{- end }} +{{- /* Object Storage for log payloads */ -}} +{{- if and .Values.storage.logsStore.objectStorage .Values.storage.logsStore.objectStorage.enabled }} +{{- $os := .Values.storage.logsStore.objectStorage }} +{{- $osConfig := dict "type" $os.type "bucket" $os.bucket }} +{{- if $os.prefix }} +{{- $_ := set $osConfig "prefix" $os.prefix }} +{{- end }} +{{- if $os.compress }} +{{- $_ := set $osConfig "compress" true }} +{{- end }} +{{- if eq $os.type "s3" }} +{{- if $os.region }} +{{- $_ := set $osConfig "region" $os.region }} +{{- end }} +{{- if $os.endpoint }} +{{- $_ := set $osConfig "endpoint" $os.endpoint }} +{{- end }} +{{- if $os.existingSecret }} +{{- if $os.accessKeyIdKey }} +{{- $_ := set $osConfig "access_key_id" "env.BIFROST_OBJECT_STORAGE_ACCESS_KEY_ID" }} +{{- end }} +{{- if $os.secretAccessKeyKey }} +{{- $_ := set $osConfig "secret_access_key" "env.BIFROST_OBJECT_STORAGE_SECRET_ACCESS_KEY" }} +{{- end }} +{{- if $os.sessionTokenKey }} +{{- $_ := set $osConfig "session_token" "env.BIFROST_OBJECT_STORAGE_SESSION_TOKEN" }} +{{- end }} +{{- $_ := set $osConfig "role_arn" "env.BIFROST_OBJECT_STORAGE_ROLE_ARN" }} +{{- else }} +{{- if $os.accessKeyId }} +{{- $_ := set $osConfig "access_key_id" $os.accessKeyId }} +{{- end }} +{{- if $os.secretAccessKey }} +{{- $_ := set $osConfig "secret_access_key" $os.secretAccessKey }} +{{- end }} +{{- if $os.sessionToken }} +{{- $_ := set $osConfig "session_token" $os.sessionToken }} +{{- end }} +{{- if $os.roleArn }} +{{- $_ := set $osConfig "role_arn" $os.roleArn }} +{{- end }} +{{- end }} +{{- if $os.forcePathStyle }} +{{- $_ := set $osConfig "force_path_style" true }} +{{- end }} +{{- end }} +{{- if eq $os.type "gcs" }} +{{- if $os.projectId }} +{{- $_ := set $osConfig "project_id" $os.projectId }} +{{- end }} +{{- if $os.existingSecret }} +{{- $_ := set $osConfig "credentials_json" "env.BIFROST_OBJECT_STORAGE_CREDENTIALS_JSON" }} +{{- else if $os.credentialsJson }} +{{- $_ := set $osConfig "credentials_json" $os.credentialsJson }} +{{- end }} +{{- end }} +{{- $_ := set (index $config "logs_store") "object_storage" $osConfig }} +{{- end }} {{- end }} {{- /* Vector Store */ -}} {{- if and .Values.vectorStore.enabled (ne .Values.vectorStore.type "none") }} @@ -682,6 +763,10 @@ false {{- if and (eq $client.connectionType "websocket") $client.websocketConfig }} {{- $_ := set $cc "connection_string" $client.websocketConfig.url }} {{- end }} +{{- /* Map connectionString for SSE connections */ -}} +{{- if and (eq $client.connectionType "sse") $client.connectionString }} +{{- $_ := set $cc "connection_string" $client.connectionString }} +{{- end }} {{- /* Map stdioConfig -> stdio_config */ -}} {{- if $client.stdioConfig }} {{- $stdio := dict "command" $client.stdioConfig.command }} @@ -697,17 +782,17 @@ false {{- if $client.headers }} {{- $_ := set $cc "headers" $client.headers }} {{- end }} -{{- if $client.toolsToExecute }} -{{- $_ := set $cc "tools_to_execute" $client.toolsToExecute }} +{{- if $client.tools_to_execute }} +{{- $_ := set $cc "tools_to_execute" $client.tools_to_execute }} {{- end }} -{{- if $client.toolsToAutoExecute }} -{{- $_ := set $cc "tools_to_auto_execute" $client.toolsToAutoExecute }} +{{- if $client.tools_to_auto_execute }} +{{- $_ := set $cc "tools_to_auto_execute" $client.tools_to_auto_execute }} {{- end }} -{{- if $client.authType }} -{{- $_ := set $cc "auth_type" $client.authType }} +{{- if $client.auth_type }} +{{- $_ := set $cc "auth_type" $client.auth_type }} {{- end }} -{{- if $client.oauthConfigId }} -{{- $_ := set $cc "oauth_config_id" $client.oauthConfigId }} +{{- if $client.oauth_config_id }} +{{- $_ := set $cc "oauth_config_id" $client.oauth_config_id }} {{- end }} {{- if hasKey $client "isPingAvailable" }} {{- $_ := set $cc "is_ping_available" $client.isPingAvailable }} @@ -724,6 +809,17 @@ false {{- if $client.toolPricing }} {{- $_ := set $cc "tool_pricing" $client.toolPricing }} {{- end }} +{{- if $client.allowedExtraHeaders }} +{{- $_ := set $cc "allowed_extra_headers" $client.allowedExtraHeaders }} +{{- end }} +{{- if hasKey $client "allowOnAllVirtualKeys" }} +{{- $_ := set $cc "allow_on_all_virtual_keys" $client.allowOnAllVirtualKeys }} +{{- end }} +{{- /* Override connection_string with env var placeholder when secretRef is set */ -}} +{{- if and $client.secretRef $client.secretRef.name }} +{{- $envName := printf "BIFROST_MCP_%s_CONNECTION_STRING" (regexReplaceAll "[^A-Z0-9]+" (upper $client.name) "_") }} +{{- $_ := set $cc "connection_string" (printf "env.%s" $envName) }} +{{- end }} {{- $clientConfigs = append $clientConfigs $cc }} {{- end }} {{- $mcpConfig := dict "client_configs" $clientConfigs }} @@ -738,6 +834,9 @@ false {{- if .Values.bifrost.mcp.toolManagerConfig.codeModeBindingLevel }} {{- $_ := set $tmConfig "code_mode_binding_level" .Values.bifrost.mcp.toolManagerConfig.codeModeBindingLevel }} {{- end }} +{{- if hasKey .Values.bifrost.mcp.toolManagerConfig "disableAutoToolInject" }} +{{- $_ := set $tmConfig "disable_auto_tool_inject" .Values.bifrost.mcp.toolManagerConfig.disableAutoToolInject }} +{{- end }} {{- if $tmConfig }} {{- $_ := set $mcpConfig "tool_manager_config" $tmConfig }} {{- end }} @@ -913,6 +1012,62 @@ false {{- $_ := set $config "audit_logs" $auditLogs }} {{- end }} {{- end }} +{{- /* Large Payload Optimization */ -}} +{{- if .Values.bifrost.largePayloadOptimization }} +{{- $lpo := dict }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "enabled" }} +{{- $_ := set $lpo "enabled" .Values.bifrost.largePayloadOptimization.enabled }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "requestThresholdBytes" }} +{{- $_ := set $lpo "request_threshold_bytes" .Values.bifrost.largePayloadOptimization.requestThresholdBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "responseThresholdBytes" }} +{{- $_ := set $lpo "response_threshold_bytes" .Values.bifrost.largePayloadOptimization.responseThresholdBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "prefetchSizeBytes" }} +{{- $_ := set $lpo "prefetch_size_bytes" .Values.bifrost.largePayloadOptimization.prefetchSizeBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "maxPayloadBytes" }} +{{- $_ := set $lpo "max_payload_bytes" .Values.bifrost.largePayloadOptimization.maxPayloadBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "truncatedLogBytes" }} +{{- $_ := set $lpo "truncated_log_bytes" .Values.bifrost.largePayloadOptimization.truncatedLogBytes }} +{{- end }} +{{- if $lpo }} +{{- $_ := set $config "large_payload_optimization" $lpo }} +{{- end }} +{{- end }} +{{- /* WebSocket Config */ -}} +{{- if .Values.bifrost.websocket }} +{{- $ws := dict }} +{{- if .Values.bifrost.websocket.maxConnectionsPerUser }} +{{- $_ := set $ws "max_connections_per_user" .Values.bifrost.websocket.maxConnectionsPerUser }} +{{- end }} +{{- if .Values.bifrost.websocket.transcriptBufferSize }} +{{- $_ := set $ws "transcript_buffer_size" .Values.bifrost.websocket.transcriptBufferSize }} +{{- end }} +{{- if .Values.bifrost.websocket.pool }} +{{- $pool := dict }} +{{- if .Values.bifrost.websocket.pool.maxIdlePerKey }} +{{- $_ := set $pool "max_idle_per_key" .Values.bifrost.websocket.pool.maxIdlePerKey }} +{{- end }} +{{- if .Values.bifrost.websocket.pool.maxTotalConnections }} +{{- $_ := set $pool "max_total_connections" .Values.bifrost.websocket.pool.maxTotalConnections }} +{{- end }} +{{- if .Values.bifrost.websocket.pool.idleTimeoutSeconds }} +{{- $_ := set $pool "idle_timeout_seconds" .Values.bifrost.websocket.pool.idleTimeoutSeconds }} +{{- end }} +{{- if .Values.bifrost.websocket.pool.maxConnectionLifetimeSeconds }} +{{- $_ := set $pool "max_connection_lifetime_seconds" .Values.bifrost.websocket.pool.maxConnectionLifetimeSeconds }} +{{- end }} +{{- if $pool }} +{{- $_ := set $ws "pool" $pool }} +{{- end }} +{{- end }} +{{- if $ws }} +{{- $_ := set $config "websocket" $ws }} +{{- end }} +{{- end }} {{- $config | toJson }} {{- end }} @@ -941,7 +1096,7 @@ Call this template at the beginning of deployment/stateful templates {{- fail "ERROR: bifrost.plugins.otel.config.collector_url is required when OTEL plugin is enabled. Provide the URL of your OpenTelemetry collector." }} {{- end }} {{- if not .Values.bifrost.plugins.otel.config.trace_type }} -{{- fail "ERROR: bifrost.plugins.otel.config.trace_type is required when OTEL plugin is enabled. Supported value: otel" }} +{{- fail "ERROR: bifrost.plugins.otel.config.trace_type is required when OTEL plugin is enabled. Supported values: genai_extension, vercel, open_inference" }} {{- end }} {{- if not .Values.bifrost.plugins.otel.config.protocol }} {{- fail "ERROR: bifrost.plugins.otel.config.protocol is required when OTEL plugin is enabled. Supported values: http, grpc" }} @@ -955,22 +1110,29 @@ Call this template at the beginning of deployment/stateful templates {{- end }} {{- end }} -{{/* Validate SAML/Okta config when enabled */}} -{{- if and .Values.bifrost.saml .Values.bifrost.saml.enabled }} -{{- if eq .Values.bifrost.saml.provider "okta" }} -{{- if not .Values.bifrost.saml.config.issuerUrl }} -{{- fail "ERROR: bifrost.saml.config.issuerUrl is required when SAML provider is Okta. Example: https://your-domain.okta.com/oauth2/default" }} +{{/* Validate SCIM/SSO config when enabled */}} +{{- $scimValidation := .Values.bifrost.scim }} +{{- if and $scimValidation $scimValidation.enabled }} +{{- if eq $scimValidation.provider "okta" }} +{{- if not $scimValidation.config.issuerUrl }} +{{- fail "ERROR: bifrost.scim.config.issuerUrl is required when SCIM provider is Okta. Example: https://your-domain.okta.com/oauth2/default" }} +{{- end }} +{{- if not $scimValidation.config.clientId }} +{{- fail "ERROR: bifrost.scim.config.clientId is required when SCIM provider is Okta." }} +{{- end }} +{{- if not $scimValidation.config.clientSecret }} +{{- fail "ERROR: bifrost.scim.config.clientSecret is required when SCIM provider is Okta." }} {{- end }} -{{- if not .Values.bifrost.saml.config.clientId }} -{{- fail "ERROR: bifrost.saml.config.clientId is required when SAML provider is Okta." }} +{{- if not $scimValidation.config.apiToken }} +{{- fail "ERROR: bifrost.scim.config.apiToken is required when SCIM provider is Okta." }} {{- end }} {{- end }} -{{- if eq .Values.bifrost.saml.provider "entra" }} -{{- if not .Values.bifrost.saml.config.tenantId }} -{{- fail "ERROR: bifrost.saml.config.tenantId is required when SAML provider is Entra (Azure AD)." }} +{{- if eq $scimValidation.provider "entra" }} +{{- if not $scimValidation.config.tenantId }} +{{- fail "ERROR: bifrost.scim.config.tenantId is required when SCIM provider is Entra (Azure AD)." }} {{- end }} -{{- if not .Values.bifrost.saml.config.clientId }} -{{- fail "ERROR: bifrost.saml.config.clientId is required when SAML provider is Entra (Azure AD)." }} +{{- if not $scimValidation.config.clientId }} +{{- fail "ERROR: bifrost.scim.config.clientId is required when SCIM provider is Entra (Azure AD)." }} {{- end }} {{- end }} {{- end }} diff --git a/helm-charts/bifrost/templates/deployment.yaml b/helm-charts/bifrost/templates/deployment.yaml index 9200f78548..74dc54e322 100644 --- a/helm-charts/bifrost/templates/deployment.yaml +++ b/helm-charts/bifrost/templates/deployment.yaml @@ -142,6 +142,42 @@ spec: name: {{ .Values.vectorStore.pinecone.external.existingSecret }} key: {{ .Values.vectorStore.pinecone.external.apiKeyKey | default "api-key" }} {{- end }} + {{- /* Object storage credentials from existing secret */ -}} + {{- if and .Values.storage.logsStore.enabled .Values.storage.logsStore.objectStorage .Values.storage.logsStore.objectStorage.enabled .Values.storage.logsStore.objectStorage.existingSecret }} + {{- if eq .Values.storage.logsStore.objectStorage.type "s3" }} + - name: BIFROST_OBJECT_STORAGE_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.accessKeyIdKey | default "access-key-id" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.secretAccessKeyKey | default "secret-access-key" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SESSION_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.sessionTokenKey | default "session-token" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_ROLE_ARN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.roleArnKey | default "role-arn" }} + optional: true + {{- end }} + {{- if eq .Values.storage.logsStore.objectStorage.type "gcs" }} + - name: BIFROST_OBJECT_STORAGE_CREDENTIALS_JSON + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.credentialsJsonKey | default "credentials-json" }} + {{- end }} + {{- end }} {{- /* Maxim API key from existing secret */ -}} {{- if and .Values.bifrost.plugins.maxim.enabled .Values.bifrost.plugins.maxim.secretRef .Values.bifrost.plugins.maxim.secretRef.name }} - name: BIFROST_MAXIM_API_KEY @@ -150,6 +186,18 @@ spec: name: {{ .Values.bifrost.plugins.maxim.secretRef.name }} key: {{ .Values.bifrost.plugins.maxim.secretRef.key | default "api-key" }} {{- end }} + {{- /* MCP client connection strings from existing secrets (one per client with secretRef.name set) */ -}} + {{- if .Values.bifrost.mcp.enabled }} + {{- range $idx, $client := .Values.bifrost.mcp.clientConfigs }} + {{- if and $client.secretRef $client.secretRef.name }} + - name: BIFROST_MCP_{{ regexReplaceAll "[^A-Z0-9]+" (upper $client.name) "_" }}_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: {{ $client.secretRef.name }} + key: {{ $client.secretRef.connectionStringKey | default "connection-string" }} + {{- end }} + {{- end }} + {{- end }} {{- /* Governance auth credentials from existing secret */ -}} {{- if and .Values.bifrost.governance .Values.bifrost.governance.authConfig .Values.bifrost.governance.authConfig.existingSecret }} - name: BIFROST_ADMIN_USERNAME diff --git a/helm-charts/bifrost/templates/stateful.yaml b/helm-charts/bifrost/templates/stateful.yaml index 4652480a6c..2443e9ffb4 100644 --- a/helm-charts/bifrost/templates/stateful.yaml +++ b/helm-charts/bifrost/templates/stateful.yaml @@ -142,6 +142,42 @@ spec: name: {{ .Values.vectorStore.pinecone.external.existingSecret }} key: {{ .Values.vectorStore.pinecone.external.apiKeyKey | default "api-key" }} {{- end }} + {{- /* Object storage credentials from existing secret */ -}} + {{- if and .Values.storage.logsStore.enabled .Values.storage.logsStore.objectStorage .Values.storage.logsStore.objectStorage.enabled .Values.storage.logsStore.objectStorage.existingSecret }} + {{- if eq .Values.storage.logsStore.objectStorage.type "s3" }} + - name: BIFROST_OBJECT_STORAGE_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.accessKeyIdKey | default "access-key-id" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.secretAccessKeyKey | default "secret-access-key" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SESSION_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.sessionTokenKey | default "session-token" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_ROLE_ARN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.roleArnKey | default "role-arn" }} + optional: true + {{- end }} + {{- if eq .Values.storage.logsStore.objectStorage.type "gcs" }} + - name: BIFROST_OBJECT_STORAGE_CREDENTIALS_JSON + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.credentialsJsonKey | default "credentials-json" }} + {{- end }} + {{- end }} {{- /* Maxim API key from existing secret */ -}} {{- if and .Values.bifrost.plugins.maxim.enabled .Values.bifrost.plugins.maxim.secretRef .Values.bifrost.plugins.maxim.secretRef.name }} - name: BIFROST_MAXIM_API_KEY @@ -150,6 +186,18 @@ spec: name: {{ .Values.bifrost.plugins.maxim.secretRef.name }} key: {{ .Values.bifrost.plugins.maxim.secretRef.key | default "api-key" }} {{- end }} + {{- /* MCP client connection strings from existing secrets (one per client with secretRef.name set) */ -}} + {{- if .Values.bifrost.mcp.enabled }} + {{- range $idx, $client := .Values.bifrost.mcp.clientConfigs }} + {{- if and $client.secretRef $client.secretRef.name }} + - name: BIFROST_MCP_{{ regexReplaceAll "[^A-Z0-9]+" (upper $client.name) "_" }}_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: {{ $client.secretRef.name }} + key: {{ $client.secretRef.connectionStringKey | default "connection-string" }} + {{- end }} + {{- end }} + {{- end }} {{- /* Governance auth credentials from existing secret */ -}} {{- if and .Values.bifrost.governance .Values.bifrost.governance.authConfig .Values.bifrost.governance.authConfig.existingSecret }} - name: BIFROST_ADMIN_USERNAME diff --git a/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml b/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml index 15fe5d08dd..9f57bdf5ab 100644 --- a/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml +++ b/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml @@ -74,15 +74,15 @@ bifrost: - name: "openai-primary" value: "sk-dummy-openai-key-1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 2 # Gets 50% of traffic (2 out of 4 total weight) - models: + models: ["*"] - name: "openai-secondary" value: "sk-dummy-openai-key-2-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 # Gets 25% of traffic - models: + models: ["*"] - name: "openai-backup" value: "sk-dummy-openai-key-3-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 # Gets 25% of traffic - models: + models: ["*"] # Anthropic - 2 API keys anthropic: @@ -90,11 +90,11 @@ bifrost: - name: "anthropic-primary" value: "sk-ant-dummy-key-1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] - name: "anthropic-secondary" value: "sk-ant-dummy-key-2-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] # Groq - 2 API keys groq: @@ -102,11 +102,11 @@ bifrost: - name: "groq-primary" value: "gsk_dummy_groq_key_1_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] - name: "groq-secondary" value: "gsk_dummy_groq_key_2_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] # ========================================================================== # GOVERNANCE CONFIGURATION diff --git a/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml b/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml index 6f5a988039..6111ff3c98 100644 --- a/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml +++ b/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml @@ -76,11 +76,13 @@ bifrost: providers: openai: keys: - - value: "env.OPENAI_API_KEY" + - name: "openai-primary" + value: "env.OPENAI_API_KEY" weight: 1 anthropic: keys: - - value: "env.ANTHROPIC_API_KEY" + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" weight: 1 # Provider secrets - inject API keys from Kubernetes secrets as env vars diff --git a/helm-charts/bifrost/values.schema.json b/helm-charts/bifrost/values.schema.json index 1c60ca1f00..28074334d0 100644 --- a/helm-charts/bifrost/values.schema.json +++ b/helm-charts/bifrost/values.schema.json @@ -240,6 +240,15 @@ "type": "string", "description": "Encryption key for sensitive data" }, + "encryptionKeySecret": { + "type": "object", + "description": "Reference to an existing Kubernetes secret holding the encryption key. Takes precedence over `encryptionKey` when `name` is set.", + "properties": { + "name": { "type": "string" }, + "key": { "type": "string", "default": "encryption-key" } + }, + "additionalProperties": false + }, "authConfig": { "$ref": "#/$defs/authConfig" }, @@ -293,8 +302,15 @@ "type": "integer", "minimum": 1 }, - "enableLitellmFallbacks": { - "type": "boolean" + "compat": { + "type": "object", + "additionalProperties": false, + "properties": { + "convertTextToChat": { "type": "boolean" }, + "convertChatToResponses": { "type": "boolean" }, + "shouldDropParams": { "type": "boolean" }, + "shouldConvertParams": { "type": "boolean" } + } }, "prometheusLabels": { "type": "array", @@ -383,6 +399,16 @@ "hideDeletedVirtualKeysInFilters": { "type": "boolean", "description": "When true, deleted virtual keys are omitted from logs and MCP logs filter data" + }, + "mcpDisableAutoToolInject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers." + }, + "routingChainMaxDepth": { + "type": "integer", + "minimum": 1, + "description": "Maximum depth for routing rule chain evaluation", + "default": 10 } }, "additionalProperties": false @@ -456,6 +482,11 @@ "type": "string", "enum": ["server", "tool"], "description": "How tools are exposed in VFS for code execution" + }, + "disableAutoToolInject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified.", + "default": false } } }, @@ -748,7 +779,9 @@ "trace_type": { "type": "string", "enum": [ - "otel" + "genai_extension", + "vercel", + "open_inference" ], "description": "Type of trace to use for the OTEL collector" }, @@ -876,7 +909,7 @@ }, "placement": { "type": "string", - "enum": ["pre_builtin", "post_builtin"], + "enum": ["pre_builtin", "post_builtin", "builtin"], "default": "post_builtin", "description": "Plugin execution placement relative to built-in plugins" }, @@ -1065,12 +1098,12 @@ "customer_id": { "type": "string" }, - "budget_id": { - "type": "string" - }, "rate_limit_id": { "type": "string" }, + "calendar_aligned": { + "type": "boolean" + }, "provider_configs": { "type": "array", "items": { @@ -1085,16 +1118,16 @@ "mcp_client_id": { "type": "integer" }, + "mcp_client_name": { + "type": "string" + }, "tools_to_execute": { "type": "array", "items": { "type": "string" } } - }, - "required": [ - "mcp_client_id" - ] + } } } }, @@ -1155,12 +1188,6 @@ "required": ["weight"] } }, - "provider": { - "type": "string" - }, - "model": { - "type": "string" - }, "fallbacks": { "type": "array", "items": { @@ -1220,6 +1247,40 @@ }, "required": ["name"] } + }, + "pricingOverrides": { + "type": "array", + "description": "Scoped pricing overrides applied at runtime by the model catalog", + "items": { + "type": "object", + "properties": { + "id": { "type": "string", "description": "Unique pricing override ID" }, + "name": { "type": "string", "description": "Human-readable name for this override" }, + "scope_kind": { + "type": "string", + "enum": ["global", "provider", "provider_key", "virtual_key", "virtual_key_provider", "virtual_key_provider_key"], + "description": "Scope level for this override" + }, + "virtual_key_id": { "type": "string", "description": "Virtual key ID (required for virtual_key* scopes)" }, + "provider_id": { "type": "string", "description": "Provider ID (required for provider* scopes)" }, + "provider_key_id": { "type": "string", "description": "Provider key ID (required for provider_key and virtual_key_provider_key scopes)" }, + "match_type": { + "type": "string", + "enum": ["exact", "wildcard"], + "description": "How the pattern is matched against model names" + }, + "pattern": { "type": "string", "description": "Model name pattern to match" }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { "type": "string" }, + "description": "Request types this override applies to" + }, + "pricing_patch": { "type": "string", "description": "JSON-encoded pricing fields to override" }, + "config_hash": { "type": "string", "description": "Internal hash for change detection (auto-managed)" } + }, + "required": ["id", "name", "scope_kind", "match_type", "pattern", "request_types"] + } } }, "additionalProperties": false @@ -1359,7 +1420,7 @@ ] } }, - "saml": { + "scim": { "type": "object", "properties": { "enabled": { @@ -1372,7 +1433,7 @@ "okta", "entra" ], - "description": "SAML provider type (empty when not configured)" + "description": "SCIM/SSO provider type (empty when not configured)" }, "config": { "type": "object" @@ -1409,6 +1470,9 @@ "clientSecret": { "type": "string" }, + "apiToken": { + "type": "string" + }, "audience": { "type": "string" }, @@ -1424,7 +1488,9 @@ }, "required": [ "issuerUrl", - "clientId" + "clientId", + "clientSecret", + "apiToken" ] } } @@ -1606,6 +1672,89 @@ "type": "string" } } + }, + "largePayloadOptimization": { + "type": "object", + "description": "Large payload streaming optimization configuration", + "properties": { + "enabled": { + "type": "boolean", + "default": false + }, + "requestThresholdBytes": { + "type": "integer", + "minimum": 0, + "default": 10485760 + }, + "responseThresholdBytes": { + "type": "integer", + "minimum": 0, + "default": 10485760 + }, + "prefetchSizeBytes": { + "type": "integer", + "minimum": 0, + "default": 65536 + }, + "maxPayloadBytes": { + "type": "integer", + "minimum": 0, + "default": 524288000 + }, + "truncatedLogBytes": { + "type": "integer", + "minimum": 0, + "default": 1048576 + } + } + }, + "websocket": { + "type": "object", + "description": "Optional tuning for the WebSocket gateway (Responses API WebSocket Mode, Realtime API)", + "properties": { + "maxConnectionsPerUser": { + "type": "integer", + "minimum": 1, + "description": "Maximum concurrent WebSocket connections per user", + "default": 100 + }, + "transcriptBufferSize": { + "type": "integer", + "minimum": 1, + "description": "Number of transcript entries to buffer for Realtime API mid-session fallback", + "default": 100 + }, + "pool": { + "type": "object", + "description": "Upstream WebSocket connection pool configuration", + "properties": { + "maxIdlePerKey": { + "type": "integer", + "minimum": 1, + "description": "Maximum idle connections per provider/key combination", + "default": 50 + }, + "maxTotalConnections": { + "type": "integer", + "minimum": 1, + "description": "Maximum total idle connections across all providers", + "default": 1000 + }, + "idleTimeoutSeconds": { + "type": "integer", + "minimum": 1, + "description": "Seconds before an idle connection is evicted", + "default": 600 + }, + "maxConnectionLifetimeSeconds": { + "type": "integer", + "minimum": 1, + "description": "Maximum lifetime of a connection in seconds", + "default": 7200 + } + } + } + } } } }, @@ -1689,6 +1838,81 @@ "maxOpenConns": { "type": "integer", "minimum": 2 + }, + "objectStorage": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["s3", "gcs"] + }, + "bucket": { + "type": "string" + }, + "prefix": { + "type": "string" + }, + "compress": { + "type": "boolean" + }, + "region": { + "type": "string" + }, + "endpoint": { + "type": "string" + }, + "accessKeyId": { + "type": "string" + }, + "secretAccessKey": { + "type": "string" + }, + "sessionToken": { + "type": "string" + }, + "roleArn": { + "type": "string" + }, + "forcePathStyle": { + "type": "boolean" + }, + "projectId": { + "type": "string" + }, + "credentialsJson": { + "type": "string" + }, + "existingSecret": { + "type": "string" + }, + "accessKeyIdKey": { + "type": "string" + }, + "secretAccessKeyKey": { + "type": "string" + }, + "sessionTokenKey": { + "type": "string" + }, + "roleArnKey": { + "type": "string" + }, + "credentialsJsonKey": { + "type": "string" + } + }, + "if": { + "properties": { + "enabled": { "const": true } + }, + "required": ["enabled"] + }, + "then": { + "required": ["type", "bucket"] + } } } } @@ -2581,6 +2805,11 @@ "minimum": 1, "maximum": 10000, "description": "Maximum number of TCP connections per provider host. For HTTP/2 (e.g. Bedrock), each connection supports ~100 concurrent streams. Default: 5000." + }, + "beta_header_overrides": { + "type": "object", + "additionalProperties": { "type": "boolean" }, + "description": "Override default Anthropic beta header support per provider. Keys are header prefixes, values are true (supported) or false (unsupported)." } } }, @@ -2658,6 +2887,15 @@ "type": "string", "description": "HTTP or SSE URL (required for HTTP or SSE connections)" }, + "secretRef": { + "type": "object", + "description": "Reference to an existing Kubernetes secret holding the MCP connection_string. Chart injects BIFROST_MCP__CONNECTION_STRING and rewrites connection_string in config.json.", + "properties": { + "name": { "type": "string" }, + "connectionStringKey": { "type": "string", "default": "connection-string" } + }, + "additionalProperties": false + }, "authType": { "type": "string", "enum": ["none", "headers", "oauth"], @@ -2744,6 +2982,16 @@ "type": "number", "minimum": 0 } + }, + "allowedExtraHeaders": { + "type": "array", + "items": { "type": "string" }, + "description": "Allowlist of request-level headers that callers may forward to this MCP server. Use ['*'] to allow all headers." + }, + "allowOnAllVirtualKeys": { + "type": "boolean", + "description": "When true, this MCP server is accessible to all virtual keys without requiring explicit per-key assignment.", + "default": false } }, "required": [ @@ -2832,7 +3080,7 @@ }, "allowed_models": { "type": "array", - "description": "Allowed models for this provider config (empty means all models allowed)", + "description": "Allowed models for this provider config. Use [\"*\"] to allow all models; empty array denies all (deny-by-default).", "items": { "type": "string" } @@ -2872,7 +3120,7 @@ "items": { "type": "string" }, - "description": "Supported models for this key" + "description": "Models this key can access. Use [\"*\"] to allow all models; empty array denies all (deny-by-default)." }, "weight": { "type": "number", @@ -2899,7 +3147,6 @@ "description": "Azure API version" } }, - "required": ["endpoint"], "additionalProperties": false }, "vertex_key_config": { @@ -2929,7 +3176,6 @@ "description": "Model to deployment mappings" } }, - "required": ["project_id", "region"], "additionalProperties": false }, "bedrock_key_config": { @@ -3009,10 +3255,6 @@ "description": "Exact model name served on this VLLM instance" } }, - "required": [ - "url", - "model_name" - ], "additionalProperties": false } }, @@ -3067,8 +3309,7 @@ ] } } - ], - "required": ["key_id", "name", "value"] + ] } } }, @@ -3078,4 +3319,4 @@ "additionalProperties": false } } -} +} \ No newline at end of file diff --git a/helm-charts/bifrost/values.yaml b/helm-charts/bifrost/values.yaml index fb468794c6..41b9a67da0 100644 --- a/helm-charts/bifrost/values.yaml +++ b/helm-charts/bifrost/values.yaml @@ -56,7 +56,7 @@ podSecurityContext: securityContext: capabilities: drop: - - ALL + - ALL readOnlyRootFilesystem: false runAsNonRoot: true runAsUser: 1000 @@ -163,6 +163,14 @@ bifrost: # Can be set as a secret or environment variable encryptionKey: "" + # Use an existing Kubernetes secret for the encryption key. + # When `name` is set, takes precedence over `encryptionKey`: the chart + # injects BIFROST_ENCRYPTION_KEY into the pod via secretKeyRef and writes + # `encryption_key: "env.BIFROST_ENCRYPTION_KEY"` in the rendered config.json. + encryptionKeySecret: + name: "" + key: "encryption-key" + # Authentication configuration (top-level) # This controls authentication for Bifrost API and dashboard authConfig: @@ -188,7 +196,11 @@ bifrost: enforceGovernanceHeader: false allowDirectKeys: false maxRequestBodySizeMb: 100 - enableLitellmFallbacks: false + compat: + convertTextToChat: false + convertChatToResponses: false + shouldDropParams: false + shouldConvertParams: false prometheusLabels: [] # Header filtering configuration for x-bf-eh-* headers forwarded to LLM providers headerFilterConfig: @@ -204,6 +216,7 @@ bifrost: # mcpToolSyncInterval: 0 # Global tool sync interval in minutes (0 = disabled) # hideDeletedVirtualKeysInFilters: false # Omit deleted virtual keys from logs/MCP filter data # whitelistedRoutes: [] # Routes that bypass auth middleware + # routingChainMaxDepth: 10 # Maximum depth for routing rule chain evaluation # Framework configuration framework: @@ -255,46 +268,46 @@ bifrost: # ca_cert_pem: "" # PEM-encoded CA cert for SSL-intercepting proxies # send_back_raw_response: false # Include raw response in BifrostResponse # store_raw_request_response: false # Capture raw payloads for plugins only; not returned to client - # - # anthropic: - # keys: - # - name: "anthropic-key" - # value: "sk-ant-..." - # weight: 1 - # - # # Azure OpenAI example (requires azure_key_config) - # azure: - # keys: - # - name: "azure-key" - # value: "..." - # weight: 1 - # azure_key_config: - # endpoint: "https://your-resource.openai.azure.com" - # api_version: "2024-02-15-preview" - # deployments: - # gpt-4o: "my-gpt4o-deployment" - # - # # Google Vertex AI example (requires vertex_key_config) - # vertex: - # keys: - # - name: "vertex-key" - # value: "" - # weight: 1 - # vertex_key_config: - # project_id: "my-gcp-project" - # region: "us-central1" - # auth_credentials: "env.GOOGLE_CREDENTIALS" - # - # # AWS Bedrock example (requires bedrock_key_config) - # bedrock: - # keys: - # - name: "bedrock-key" - # value: "" - # weight: 1 - # bedrock_key_config: - # region: "us-east-1" - # access_key: "env.AWS_ACCESS_KEY_ID" - # secret_key: "env.AWS_SECRET_ACCESS_KEY" + # + # anthropic: + # keys: + # - name: "anthropic-key" + # value: "sk-ant-..." + # weight: 1 + # + # # Azure OpenAI example (requires azure_key_config) + # azure: + # keys: + # - name: "azure-key" + # value: "..." + # weight: 1 + # azure_key_config: + # endpoint: "https://your-resource.openai.azure.com" + # api_version: "2024-02-15-preview" + # deployments: + # gpt-4o: "my-gpt4o-deployment" + # + # # Google Vertex AI example (requires vertex_key_config) + # vertex: + # keys: + # - name: "vertex-key" + # value: "" + # weight: 1 + # vertex_key_config: + # project_id: "my-gcp-project" + # region: "us-central1" + # auth_credentials: "env.GOOGLE_CREDENTIALS" + # + # # AWS Bedrock example (requires bedrock_key_config) + # bedrock: + # keys: + # - name: "bedrock-key" + # value: "" + # weight: 1 + # bedrock_key_config: + # region: "us-east-1" + # access_key: "env.AWS_ACCESS_KEY_ID" + # secret_key: "env.AWS_SECRET_ACCESS_KEY" # Provider secrets - use existing Kubernetes secrets for provider API keys # These will be injected as environment variables that can be referenced in providers config @@ -318,6 +331,13 @@ bifrost: # command: "/path/to/mcp/server" # args: [] # envs: [] + # # Optional: source connection_string from a Kubernetes secret. + # # When set, chart injects BIFROST_MCP__CONNECTION_STRING + # # into the pod and rewrites connection_string in config.json + # # to `env.BIFROST_MCP__CONNECTION_STRING`. + # secretRef: + # name: "" # k8s secret name + # connectionStringKey: "connection-string" # key within the secret # toolSyncInterval: "10m" # Global tool sync interval (Go duration) # Tool manager configuration toolManagerConfig: @@ -387,17 +407,17 @@ bifrost: config: service_name: "bifrost" collector_url: "" - trace_type: "otel" + trace_type: "genai_extension" protocol: "grpc" # Push-based metrics export via OTLP (recommended for multi-node clusters) metrics_enabled: false - metrics_endpoint: "" # e.g., http://otel-collector:4318/v1/metrics (HTTP) or otel-collector:4317 (gRPC) - metrics_push_interval: 15 # Push interval in seconds (1-300) + metrics_endpoint: "" # e.g., http://otel-collector:4318/v1/metrics (HTTP) or otel-collector:4317 (gRPC) + metrics_push_interval: 15 # Push interval in seconds (1-300) # Custom headers for the collector (supports env.VAR_NAME prefix for env var substitution) headers: {} # TLS configuration - tls_ca_cert: "" # Path to TLS CA certificate file - insecure: false # Skip TLS verification (ignored if tls_ca_cert is set) + tls_ca_cert: "" # Path to TLS CA certificate file + insecure: false # Skip TLS verification (ignored if tls_ca_cert is set) datadog: enabled: false @@ -486,13 +506,15 @@ bifrost: # name: "Route to Azure" # description: "Route GPT requests to Azure" # enabled: true - # cel_expression: "request.model.startsWith('gpt-')" - # provider: "azure" - # model: "" # Empty means use original model + # cel_expression: "model.startsWith('gpt-')" + # targets: + # - provider: "azure" + # model: "" # Empty means use original model + # weight: 1.0 # fallbacks: ["openai"] - # scope: "global" # Options: global, team, customer, virtual_key - # scope_id: "" # Required for non-global scopes - # priority: 0 # Lower = evaluated first + # scope: "global" # Options: global, team, customer, virtual_key + # scope_id: "" # Required for non-global scopes + # priority: 0 # Lower = evaluated first authConfig: adminUsername: "" adminPassword: "" @@ -535,8 +557,8 @@ bifrost: # mDNS discovery mdnsService: "" - # SAML/SCIM configuration for enterprise SSO - saml: + # SCIM/SSO configuration for enterprise SSO + scim: enabled: false # Provider: okta, entra provider: "" @@ -545,6 +567,7 @@ bifrost: # issuerUrl: "https://your-domain.okta.com/oauth2/default" # clientId: "" # clientSecret: "" + # apiToken: "" # audience: "" # userIdField: "sub" # teamIdsField: "groups" @@ -590,11 +613,30 @@ bifrost: disabled: false hmacKey: "" + # Large payload optimization - streams large payloads without full materialization + # largePayloadOptimization: + # enabled: false + # requestThresholdBytes: 10485760 # 10MB + # responseThresholdBytes: 10485760 # 10MB + # prefetchSizeBytes: 65536 # 64KB + # maxPayloadBytes: 524288000 # 500MB + # truncatedLogBytes: 1048576 # 1MB + + # WebSocket gateway configuration (Responses API, Realtime API) + # websocket: + # maxConnectionsPerUser: 100 + # transcriptBufferSize: 100 + # pool: + # maxIdlePerKey: 50 + # maxTotalConnections: 1000 + # idleTimeoutSeconds: 600 + # maxConnectionLifetimeSeconds: 7200 + # Storage configuration storage: # Default storage mode: sqlite or postgres # Used as fallback when per-store type is not specified - mode: sqlite # Options: sqlite, postgres + mode: sqlite # Options: sqlite, postgres # Persistent volume for SQLite databases (when using sqlite for any store) persistence: @@ -608,7 +650,7 @@ storage: configStore: enabled: true # Backend type for config store. Empty string uses storage.mode as default - type: "" # Options: sqlite, postgres, or "" (uses storage.mode) + type: "" # Options: sqlite, postgres, or "" (uses storage.mode) # PostgreSQL connection pool tuning (only applies when type is postgres) # maxIdleConns: 5 # maxOpenConns: 50 @@ -617,11 +659,34 @@ storage: logsStore: enabled: true # Backend type for logs store. Empty string uses storage.mode as default - type: "" # Options: sqlite, postgres, or "" (uses storage.mode) + type: "" # Options: sqlite, postgres, or "" (uses storage.mode) # PostgreSQL connection pool tuning (only applies when type is postgres) # maxIdleConns: 5 # maxOpenConns: 50 + # Object storage for offloading large log payloads (optional) + # When enabled, request/response payloads are stored in S3/GCS + # while the DB keeps only lightweight index data for fast analytics. + objectStorage: + enabled: false + # type: s3 # Options: s3, gcs + # bucket: "" # Bucket name + # prefix: bifrost # Key prefix for stored objects + # compress: false # Enable gzip compression for stored objects + + # S3 configuration (when type is s3) + # region: us-east-1 + # endpoint: "" # Custom endpoint for MinIO/R2 + # accessKeyId: "" # Leave empty to use default AWS credential chain + # secretAccessKey: "" # (instance role, env vars, shared credentials, etc.) + # sessionToken: "" # AWS STS session token (optional) + # roleArn: "" # AWS IAM role ARN to assume via STS (works with static creds or instance role) + # forcePathStyle: false # Set true for MinIO + + # GCS configuration (when type is gcs) + # projectId: "" + # credentialsJson: "" # Service account JSON, omit for default credentials + # PostgreSQL configuration (when any store uses postgres) postgresql: # Deploy PostgreSQL as part of this chart @@ -672,7 +737,7 @@ postgresql: vectorStore: # Enable vector store for semantic caching enabled: false - type: none # Options: none, weaviate, redis, qdrant + type: none # Options: none, weaviate, redis, qdrant # Weaviate configuration weaviate: @@ -733,10 +798,10 @@ vectorStore: username: "" password: "" database: 0 - useTls: false # Enable TLS for Redis connection - insecureSkipVerify: false # Skip TLS certificate verification - caCertPem: "" # PEM-encoded CA certificate to trust for Redis TLS - clusterMode: false # Use Redis Cluster mode (required for AWS MemoryDB) + useTls: false # Enable TLS for Redis connection + insecureSkipVerify: false # Skip TLS certificate verification + caCertPem: "" # PEM-encoded CA certificate to trust for Redis TLS + clusterMode: false # Use Redis Cluster mode (required for AWS MemoryDB) # Connection pool tuning (optional) # poolSize: 10 # Maximum number of socket connections # maxActiveConns: 0 # Maximum number of active connections @@ -819,7 +884,7 @@ vectorStore: external: enabled: false apiKey: "" - indexHost: "" # Index host URL from Pinecone console (e.g., your-index.svc.environment.pinecone.io) + indexHost: "" # Index host URL from Pinecone console (e.g., your-index.svc.environment.pinecone.io) # Use existing Kubernetes secret for API key (takes precedence over apiKey field) existingSecret: "" apiKeyKey: "api-key" @@ -836,7 +901,6 @@ envFrom: [] # - configMapRef: # name: my-configmap - # Init containers to run before the main application container. # Provide a list of init containers using standard Kubernetes container spec. initContainers: [] diff --git a/helm-charts/index.yaml b/helm-charts/index.yaml index 8036ed2ba7..8b2ac96324 100644 --- a/helm-charts/index.yaml +++ b/helm-charts/index.yaml @@ -1,6 +1,27 @@ apiVersion: v1 entries: bifrost: + - apiVersion: v2 + appVersion: 1.4.11 + created: "2026-04-15T12:00:00.000000+00:00" + description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers + digest: "" + home: https://www.getmaxim.ai/bifrost + icon: https://www.getmaxim.ai/bifrost/bifrost-logo-only.png + keywords: + - ai + - gateway + - llm + maintainers: + - email: akshay@getmaxim.ai + name: Bifrost Team + name: bifrost + sources: + - https://github.com/maximhq/bifrost + type: application + urls: + - https://maximhq.github.io/bifrost/helm-charts/bifrost-2.1.0-prerelease2.tgz + version: 2.1.0-prerelease2 - apiVersion: v2 appVersion: 1.4.11 created: "2026-04-13T12:00:00.000000+00:00" @@ -20,8 +41,8 @@ entries: - https://github.com/maximhq/bifrost type: application urls: - - https://maximhq.github.io/bifrost/helm-charts/bifrost-2.0.18.tgz - version: 2.0.18 + - https://maximhq.github.io/bifrost/helm-charts/bifrost-2.0.18-rc.1.tgz + version: 2.0.18-rc.1 - apiVersion: v2 appVersion: 1.4.11 created: "2026-04-08T12:00:00.000000+00:00" @@ -628,4 +649,4 @@ entries: urls: - https://maximhq.github.io/bifrost/helm-charts/bifrost-1.3.36.tgz version: 1.3.36 -generated: "2026-04-08T12:00:00.000000+00:00" +generated: "2026-04-13T12:00:00.000000+00:00" diff --git a/transports/config.schema.json b/transports/config.schema.json index 913cd068cd..9653000cdc 100644 --- a/transports/config.schema.json +++ b/transports/config.schema.json @@ -10,6 +10,15 @@ "description": "The schema version. This should be set to \"https://www.getbifrost.ai/schema\"", "const": "https://www.getbifrost.ai/schema" }, + "version": { + "type": "integer", + "description": "Controls how empty arrays in allow-list fields (models, allowed_models, key_ids, tools_to_execute) are interpreted. Omit or set to 2 for v1.5.0+ semantics: empty = deny all, [\"*\"] = allow all. Set to 1 to restore v1.4.x semantics: empty = allow all.", + "enum": [ + 1, + 2 + ], + "default": 2 + }, "encryption_key": { "type": "string", "description": "You can set the value as env. to use an environment variable. We also read encryption key from BIFROST_ENCRYPTION_KEY environment variable. Note: once set, the encryption key cannot be changed unless you clean up the database. Accepts any string; a secure 32-byte AES-256 key will be derived using Argon2id KDF. If not provided, data will be saved in plain text. Recommended: use a passphrase of at least 16 bytes for better security" @@ -94,9 +103,29 @@ "minimum": 1, "description": "Maximum request body size in MB" }, - "enable_litellm_fallbacks": { - "type": "boolean", - "description": "Enable litellm-specific fallbacks for text completion for Groq" + "compat": { + "type": "object", + "description": "Compat plugin configuration for request type conversion, parameter dropping, and parameter value conversion", + "properties": { + "convert_text_to_chat": { + "type": "boolean", + "description": "Convert text completion requests to chat for models that only support chat" + }, + "convert_chat_to_responses": { + "type": "boolean", + "description": "Convert chat completion requests to responses for models that only support responses" + }, + "should_drop_params": { + "type": "boolean", + "description": "Drop unsupported parameters based on model catalog allowlist" + }, + "should_convert_params": { + "type": "boolean", + "description": "Converts model parameter values that are not supported by the model.", + "default": false + } + }, + "additionalProperties": false }, "header_filter_config": { "type": "object", @@ -172,7 +201,10 @@ }, "mcp_code_mode_binding_level": { "type": "string", - "enum": ["server", "tool"], + "enum": [ + "server", + "tool" + ], "description": "Code mode binding level for MCP tools" }, "mcp_tool_sync_interval": { @@ -180,6 +212,17 @@ "minimum": 0, "description": "Global tool sync interval in minutes (0 = disabled)", "default": 10 + }, + "mcp_disable_auto_tool_inject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers, such as x-bf-mcp-include-tools or x-bf-mcp-include-clients.", + "default": false + }, + "routing_chain_max_depth": { + "type": "integer", + "minimum": 1, + "description": "Maximum depth for routing rule chain evaluation", + "default": 10 } }, "additionalProperties": false @@ -219,7 +262,7 @@ "$ref": "#/$defs/provider" }, "ollama": { - "$ref": "#/$defs/provider" + "$ref": "#/$defs/provider_with_ollama_config" }, "groq": { "$ref": "#/$defs/provider" @@ -231,7 +274,7 @@ "$ref": "#/$defs/provider" }, "sgl": { - "$ref": "#/$defs/provider" + "$ref": "#/$defs/provider_with_sgl_config" }, "parasail": { "$ref": "#/$defs/provider" @@ -240,7 +283,7 @@ "$ref": "#/$defs/provider" }, "replicate": { - "$ref": "#/$defs/provider" + "$ref": "#/$defs/provider_with_replicate_config" }, "elevenlabs": { "$ref": "#/$defs/provider" @@ -256,6 +299,15 @@ }, "fireworks": { "$ref": "#/$defs/provider" + }, + "nebius": { + "$ref": "#/$defs/provider" + }, + "xai": { + "$ref": "#/$defs/provider" + }, + "runway": { + "$ref": "#/$defs/provider" } }, "additionalProperties": true @@ -292,10 +344,13 @@ "format": "date-time", "description": "Last time budget was reset" }, - "calendar_aligned": { - "type": "boolean", - "description": "Snap resets to calendar boundaries (day/week/month/year start)", - "default": false + "virtual_key_id": { + "type": "string", + "description": "ID of the virtual key this budget belongs to (mutually exclusive with provider_config_id)" + }, + "provider_config_id": { + "type": "integer", + "description": "ID of the provider config this budget belongs to (mutually exclusive with virtual_key_id)" } }, "required": [ @@ -462,6 +517,11 @@ "description": "Whether the virtual key is active", "default": true }, + "calendar_aligned": { + "type": "boolean", + "description": "Snap all budget resets to calendar boundaries (day, week, month, year)", + "default": false + }, "team_id": { "type": "string", "description": "Associated team ID (mutually exclusive with customer_id)" @@ -470,24 +530,20 @@ "type": "string", "description": "Associated customer ID (mutually exclusive with team_id)" }, - "budget_id": { - "type": "string", - "description": "Associated budget ID" - }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, "provider_configs": { "type": "array", - "description": "Provider configurations for this virtual key (empty means all providers allowed)", + "description": "Provider configurations for this virtual key (empty means no providers allowed, deny-by-default)", "items": { "$ref": "#/$defs/virtual_key_provider_config" } }, "mcp_configs": { "type": "array", - "description": "MCP configurations for this virtual key", + "description": "MCP configurations for this virtual key (empty array means no MCP tools allowed, deny-by-default)", "items": { "$ref": "#/$defs/virtual_key_mcp_config" } @@ -507,6 +563,13 @@ "$ref": "#/$defs/routing_rule" } }, + "pricing_overrides": { + "type": "array", + "description": "Scoped pricing overrides applied at runtime by the model catalog", + "items": { + "$ref": "#/$defs/provider_pricing_override" + } + }, "auth_config": { "$ref": "#/$defs/auth_config" }, @@ -550,10 +613,18 @@ "items": { "type": "object", "properties": { + "id": { + "type": "string", + "description": "Provider row ID" + }, "name": { "type": "string", "description": "Provider name" }, + "description": { + "type": "string", + "description": "Operator-facing provider description" + }, "budget_id": { "type": "string", "description": "Associated budget ID" @@ -573,9 +644,26 @@ "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "network_config": { + "$ref": "#/$defs/network_config" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "openai_config": { + "$ref": "#/$defs/openai_config" } }, - "required": ["name"] + "required": [ + "name" + ] } } }, @@ -878,22 +966,126 @@ } } ] + }, + "object_storage": { + "type": "object", + "description": "Optional object storage for offloading log payloads. When configured, large request/response payloads are stored in S3/GCS while the DB keeps only lightweight index data.", + "properties": { + "type": { + "type": "string", + "enum": [ + "s3", + "gcs" + ], + "description": "Object storage backend type" + }, + "bucket": { + "type": "string", + "minLength": 1, + "description": "Bucket name. Supports env var reference (e.g. env.S3_BUCKET)" + }, + "prefix": { + "type": "string", + "description": "Key prefix for stored objects (default: bifrost)", + "default": "bifrost" + }, + "compress": { + "type": "boolean", + "description": "Enable gzip compression for stored objects. Default: false", + "default": false + } + }, + "required": [ + "type", + "bucket" + ], + "if": { + "properties": { + "type": { + "const": "s3" + } + } + }, + "then": { + "properties": { + "type": true, + "bucket": true, + "prefix": true, + "region": { + "type": "string", + "description": "AWS region. Supports env var reference" + }, + "endpoint": { + "type": "string", + "description": "Custom S3-compatible endpoint for MinIO/R2. Supports env var reference" + }, + "access_key_id": { + "type": "string", + "description": "AWS access key ID. Omit to use default credential chain (instance role, env vars, etc.). Supports env var reference" + }, + "secret_access_key": { + "type": "string", + "description": "AWS secret access key. Supports env var reference" + }, + "session_token": { + "type": "string", + "description": "AWS session token for STS temporary credentials. Supports env var reference" + }, + "role_arn": { + "type": "string", + "description": "AWS IAM role ARN for STS AssumeRole. Works with static creds or instance role. Supports env var reference" + }, + "force_path_style": { + "type": "boolean", + "description": "Use path-style URLs for S3 (required for MinIO). Default: false", + "default": false + }, + "compress": true + }, + "dependentRequired": { + "access_key_id": [ + "secret_access_key" + ], + "secret_access_key": [ + "access_key_id" + ], + "session_token": [ + "access_key_id", + "secret_access_key" + ] + }, + "additionalProperties": false + }, + "else": { + "properties": { + "type": true, + "bucket": true, + "prefix": true, + "credentials_json": { + "type": "string", + "description": "GCP service account credentials JSON or file path. Omit to use Application Default Credentials. Supports env var reference" + }, + "credentials": { + "type": "string", + "description": "Deprecated: use credentials_json. Kept for backwards compatibility." + }, + "project_id": { + "type": "string", + "description": "GCP project ID override. Supports env var reference" + }, + "compress": true + }, + "additionalProperties": false + } + }, + "retention_days": { + "type": "integer", + "minimum": 0, + "description": "Days to retain log entries. 0 disables retention-based cleanup." } }, "additionalProperties": false }, - "cluster_config": { - "$ref": "#/$defs/cluster_config" - }, - "saml_config": { - "$ref": "#/$defs/saml_config" - }, - "load_balancer_config": { - "$ref": "#/$defs/load_balancer_config" - }, - "guardrails_config": { - "$ref": "#/$defs/guardrails_config" - }, "plugins": { "type": "array", "description": "Plugins configuration", @@ -910,7 +1102,7 @@ }, "name": { "type": "string", - "description": "Name of the plugin (built-in: telemetry, logging, governance, maxim, semantic_cache, otel, or custom plugin name)" + "description": "Name of the plugin (built-in: telemetry, prompts, logging, governance, maxim, semantic_cache, otel, or custom plugin name)" }, "config": { "type": "object", @@ -930,8 +1122,12 @@ }, "placement": { "type": "string", - "enum": ["pre_builtin", "post_builtin"], - "description": "Whether this plugin runs before or after built-in plugins. Default: post_builtin", + "enum": [ + "pre_builtin", + "post_builtin", + "builtin" + ], + "description": "Whether this plugin runs before, after, or as a built-in. Default: post_builtin", "optional": true, "default": "post_builtin" }, @@ -1010,10 +1206,15 @@ "description": "Password for basic authentication" } }, - "required": ["username", "password"] + "required": [ + "username", + "password" + ] } }, - "required": ["push_gateway_url"] + "required": [ + "push_gateway_url" + ] } }, "additionalProperties": false @@ -1189,7 +1390,7 @@ "oneOf": [ { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$" + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$" }, { "type": "integer", @@ -1319,7 +1520,9 @@ "type": "string", "description": "Type of trace to use for the OTEL collector", "enum": [ - "genai_extension" + "genai_extension", + "vercel", + "open_inference" ] }, "protocol": { @@ -1437,14 +1640,11 @@ "additionalProperties": false } }, - "audit_logs": { - "$ref": "#/$defs/audit_logs_config" - }, - "large_payload_optimization": { - "$ref": "#/$defs/large_payload_optimization" - }, "websocket": { "$ref": "#/$defs/websocket_config" + }, + "scim_config": { + "$ref": "#/$defs/scim_config" } }, "additionalProperties": false, @@ -1475,7 +1675,9 @@ "maximum": 1 } }, - "required": ["weight"], + "required": [ + "weight" + ], "additionalProperties": false }, "routing_rule": { @@ -1503,6 +1705,11 @@ "type": "string", "description": "CEL (Common Expression Language) expression for rule evaluation" }, + "chain_rule": { + "type": "boolean", + "default": false, + "description": "If true, re-evaluates routing chain after this rule matches" + }, "targets": { "type": "array", "minItems": 1, @@ -1522,12 +1729,17 @@ }, "scope": { "type": "string", - "enum": ["global", "team", "customer", "virtual_key"], + "enum": [ + "global", + "team", + "customer", + "virtual_key" + ], "description": "Rule scope level", "default": "global" }, "scope_id": { - "type": ["string", "null"], + "type": "string", "description": "Entity ID for non-global scopes (required for non-global scope)" }, "priority": { @@ -1541,8 +1753,37 @@ "additionalProperties": true } }, - "required": ["id", "name", "targets"], - "additionalProperties": false + "required": [ + "id", + "name", + "targets" + ], + "additionalProperties": false, + "if": { + "properties": { + "scope": { + "enum": [ + "team", + "customer", + "virtual_key" + ] + } + }, + "required": [ + "scope" + ] + }, + "then": { + "required": [ + "scope_id" + ], + "properties": { + "scope_id": { + "type": "string", + "minLength": 1 + } + } + } }, "virtual_key_provider_config": { "type": "object", @@ -1561,270 +1802,29 @@ "description": "Provider name" }, "weight": { - "type": "number", - "description": "Weight for load balancing", - "default": 1.0 + "type": [ + "number", + "null" + ], + "description": "Weight for load balancing (null opts out of weighted routing)", + "default": null }, "allowed_models": { "type": "array", - "description": "Allowed models for this provider config (empty means all models allowed)", + "description": "Allowed models for this provider config. Use [\"*\"] to allow all models; empty array denies all (deny-by-default).", "items": { "type": "string" } }, - "budget_id": { - "type": "string", - "description": "Associated budget ID" - }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, - "keys": { + "key_ids": { "type": "array", - "description": "Provider keys for this config (empty means all keys allowed for this provider)", + "description": "Key identifiers allowed for this provider config. Use [\"*\"] to allow all keys; empty array denies all (deny-by-default). In config.json, values are key names. Via the API, values are key UUIDs.", "items": { - "type": "object", - "properties": { - "id": { - "type": "integer", - "description": "Key database ID (auto-generated)" - }, - "key_id": { - "type": "string", - "description": "Key UUID identifier" - }, - "name": { - "type": "string", - "description": "Key name (must be unique)" - }, - "value": { - "type": "string", - "description": "API key value (can use env. prefix)" - }, - "models": { - "type": "array", - "items": { - "type": "string" - }, - "description": "Supported models for this key" - }, - "weight": { - "type": "number", - "minimum": 0, - "default": 1.0, - "description": "Weight for load balancing" - }, - "azure_key_config": { - "type": "object", - "properties": { - "endpoint": { - "type": "string", - "description": "Azure endpoint (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - }, - "api_version": { - "type": "string", - "description": "Azure API version" - } - }, - "required": [ - "endpoint" - ], - "additionalProperties": false - }, - "vertex_key_config": { - "type": "object", - "properties": { - "project_id": { - "type": "string", - "description": "Google Cloud project ID (can use env. prefix)" - }, - "project_number": { - "type": "string", - "description": "Google Cloud project number" - }, - "region": { - "type": "string", - "description": "Google Cloud region" - }, - "auth_credentials": { - "type": "string", - "description": "Authentication credentials (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - } - }, - "required": [ - "project_id", - "region" - ], - "additionalProperties": false - }, - "bedrock_key_config": { - "type": "object", - "properties": { - "access_key": { - "type": "string", - "description": "AWS access key (can use env. prefix)" - }, - "secret_key": { - "type": "string", - "description": "AWS secret key (can use env. prefix)" - }, - "session_token": { - "type": "string", - "description": "AWS session token (can use env. prefix)" - }, - "region": { - "type": "string", - "description": "AWS region" - }, - "arn": { - "type": "string", - "description": "AWS ARN" - }, - "role_arn": { - "type": "string", - "description": "AWS IAM role ARN for AssumeRole (can use env. prefix)" - }, - "external_id": { - "type": "string", - "description": "External ID for AssumeRole (can use env. prefix)" - }, - "session_name": { - "type": "string", - "description": "Role session name for AssumeRole (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - }, - "batch_s3_config": { - "type": "object", - "description": "S3 bucket configuration for Bedrock batch operations", - "properties": { - "buckets": { - "type": "array", - "description": "List of S3 bucket configurations", - "items": { - "type": "object", - "properties": { - "bucket_name": { - "type": "string", - "description": "S3 bucket name" - }, - "prefix": { - "type": "string", - "description": "S3 key prefix for batch files" - }, - "is_default": { - "type": "boolean", - "description": "Whether this is the default bucket for batch operations" - } - }, - "required": ["bucket_name"], - "additionalProperties": false - } - } - }, - "additionalProperties": false - } - }, - "additionalProperties": false - }, - "vllm_key_config": { - "type": "object", - "properties": { - "url": { - "type": "string", - "minLength": 1, - "description": "VLLM server base URL (can use env. prefix)" - }, - "model_name": { - "type": "string", - "minLength": 1, - "description": "Exact model name served on this VLLM instance" - } - }, - "required": [ - "url", - "model_name" - ], - "additionalProperties": false - } - }, - "oneOf": [ - { - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["vertex_key_config"] }, - { "required": ["bedrock_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["azure_key_config"], - "not": { - "anyOf": [ - { "required": ["vertex_key_config"] }, - { "required": ["bedrock_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["vertex_key_config"], - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["bedrock_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["bedrock_key_config"], - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["vertex_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["vllm_key_config"], - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["vertex_key_config"] }, - { "required": ["bedrock_key_config"] } - ] - } - } - ], - "required": [ - "key_id", - "name", - "value" - ] + "type": "string" } } }, @@ -1847,19 +1847,20 @@ }, "mcp_client_id": { "type": "integer", - "description": "Associated MCP client ID" + "description": "Associated MCP client ID (database format)" + }, + "mcp_client_name": { + "type": "string", + "description": "MCP client name (config file format \u2014 resolved to mcp_client_id at startup)" }, "tools_to_execute": { "type": "array", - "description": "Tools to execute for this MCP config", + "description": "Include-only list of tools this Virtual Key is permitted to execute from this MCP client. ['*'] means all tools allowed, [] means no tools allowed (deny-by-default).", "items": { "type": "string" } } }, - "required": [ - "mcp_client_id" - ], "additionalProperties": false }, "auth_config": { @@ -1904,169 +1905,78 @@ }, "additionalProperties": false }, - "pricing_override_match_type": { - "type": "string", - "enum": [ - "exact", - "wildcard", - "regex" - ] - }, - "pricing_override_request_type": { - "type": "string", - "enum": [ - "text_completion", - "text_completion_stream", - "chat_completion", - "chat_completion_stream", - "responses", - "responses_stream", - "embedding", - "rerank", - "ocr", - "speech", - "speech_stream", - "transcription", - "transcription_stream", - "image_generation", - "image_generation_stream" - ] - }, - "provider_pricing_override": { + "network_config": { "type": "object", "properties": { - "model_pattern": { + "base_url": { "type": "string", - "minLength": 1 + "format": "uri", + "description": "Base URL for the provider (optional, required for Ollama)" }, - "match_type": { - "$ref": "#/$defs/pricing_override_match_type" + "extra_headers": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Additional headers to send with requests" }, - "request_types": { - "type": "array", - "items": { - "$ref": "#/$defs/pricing_override_request_type" - } + "default_request_timeout_in_seconds": { + "type": "integer", + "minimum": 1, + "description": "Default request timeout in seconds" }, - "input_cost_per_token": { "type": "number", "minimum": 0 }, - "output_cost_per_token": { "type": "number", "minimum": 0 }, - "input_cost_per_video_per_second": { "type": "number", "minimum": 0 }, - "input_cost_per_audio_per_second": { "type": "number", "minimum": 0 }, - "input_cost_per_character": { "type": "number", "minimum": 0 }, - "output_cost_per_character": { "type": "number", "minimum": 0 }, - "input_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_image_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_video_per_second_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_audio_per_second_above_128k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_creation_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_read_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_read_input_token_cost": { "type": "number", "minimum": 0 }, - "cache_creation_input_token_cost": { "type": "number", "minimum": 0 }, - "input_cost_per_token_batches": { "type": "number", "minimum": 0 }, - "output_cost_per_token_batches": { "type": "number", "minimum": 0 }, - "input_cost_per_image_token": { "type": "number", "minimum": 0 }, - "output_cost_per_image_token": { "type": "number", "minimum": 0 }, - "input_cost_per_image": { "type": "number", "minimum": 0 }, - "output_cost_per_image": { "type": "number", "minimum": 0 }, - "cache_read_input_image_token_cost": { "type": "number", "minimum": 0 } - }, - "required": [ - "model_pattern", - "match_type" - ], - "additionalProperties": false - }, - "custom_provider_config": { - "type": "object", - "description": "Custom provider configuration for extending or customizing provider behavior", - "properties": { - "is_key_less": { + "max_retries": { + "type": "integer", + "minimum": 0, + "description": "Maximum number of retries" + }, + "retry_backoff_initial": { + "type": "integer", + "minimum": 0, + "description": "Initial retry backoff in milliseconds" + }, + "retry_backoff_max": { + "type": "integer", + "minimum": 0, + "description": "Maximum retry backoff in milliseconds" + }, + "enforce_http2": { "type": "boolean", - "description": "Whether the custom provider requires a key" + "description": "Force HTTP/2 on provider connections (relevant for Bedrock and other net/http-based providers)" }, - "base_provider_type": { + "insecure_skip_verify": { + "type": "boolean", + "description": "Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments." + }, + "ca_cert_pem": { "type": "string", - "description": "Base provider type to extend" + "description": "PEM-encoded CA certificate to trust for provider endpoint connections (e.g. self-signed or internal CA)" }, - "allowed_requests": { - "type": "object", - "description": "Allowed request types for the custom provider", - "properties": { - "list_models": { "type": "boolean" }, - "text_completion": { "type": "boolean" }, - "text_completion_stream": { "type": "boolean" }, - "chat_completion": { "type": "boolean" }, - "chat_completion_stream": { "type": "boolean" }, - "responses": { "type": "boolean" }, - "responses_stream": { "type": "boolean" }, - "count_tokens": { "type": "boolean" }, - "embedding": { "type": "boolean" }, - "rerank": { "type": "boolean" }, - "ocr": { "type": "boolean" }, - "speech": { "type": "boolean" }, - "speech_stream": { "type": "boolean" }, - "transcription": { "type": "boolean" }, - "transcription_stream": { "type": "boolean" }, - "image_generation": { "type": "boolean" }, - "image_generation_stream": { "type": "boolean" }, - "image_edit": { "type": "boolean" }, - "image_edit_stream": { "type": "boolean" }, - "image_variation": { "type": "boolean" }, - "video_generation": { "type": "boolean" }, - "video_retrieve": { "type": "boolean" }, - "video_download": { "type": "boolean" }, - "video_delete": { "type": "boolean" }, - "video_list": { "type": "boolean" }, - "video_remix": { "type": "boolean" }, - "batch_create": { "type": "boolean" }, - "batch_list": { "type": "boolean" }, - "batch_retrieve": { "type": "boolean" }, - "batch_cancel": { "type": "boolean" }, - "batch_delete": { "type": "boolean" }, - "batch_results": { "type": "boolean" }, - "file_upload": { "type": "boolean" }, - "file_list": { "type": "boolean" }, - "file_retrieve": { "type": "boolean" }, - "file_delete": { "type": "boolean" }, - "file_content": { "type": "boolean" }, - "container_create": { "type": "boolean" }, - "container_list": { "type": "boolean" }, - "container_retrieve": { "type": "boolean" }, - "container_delete": { "type": "boolean" }, - "container_file_create": { "type": "boolean" }, - "container_file_list": { "type": "boolean" }, - "container_file_retrieve": { "type": "boolean" }, - "container_file_content": { "type": "boolean" }, - "container_file_delete": { "type": "boolean" }, - "passthrough": { "type": "boolean" }, - "passthrough_stream": { "type": "boolean" } - }, - "additionalProperties": false + "stream_idle_timeout_in_seconds": { + "type": "integer", + "minimum": 5, + "maximum": 3600, + "description": "Idle timeout per stream chunk in seconds. If no data is received for this many seconds, the stream is closed. Default: 60." }, - "request_path_overrides": { + "max_conns_per_host": { + "type": "integer", + "minimum": 1, + "maximum": 10000, + "description": "Maximum number of TCP connections per provider host. For HTTP/2 (e.g. Bedrock), each connection supports ~100 concurrent streams. Default: 5000." + }, + "beta_header_overrides": { "type": "object", - "description": "Mapping of request type to custom path overriding the default provider path", "additionalProperties": { - "type": "string" - } + "type": "boolean" + }, + "description": "Override default Anthropic beta header support per provider. Keys are header prefixes (e.g. 'redact-thinking-'), values are true (supported) or false (unsupported). Headers not listed use the built-in defaults." } }, - "required": ["base_provider_type"], "additionalProperties": false }, - "network_config": { + "network_config_without_base_url": { "type": "object", "properties": { - "base_url": { - "type": "string", - "format": "uri", - "description": "Base URL for the provider (optional, required for Ollama)" - }, "extra_headers": { "type": "object", "additionalProperties": { @@ -2084,16 +1994,20 @@ "minimum": 0, "description": "Maximum number of retries" }, - "retry_backoff_initial_ms": { + "retry_backoff_initial": { "type": "integer", "minimum": 0, "description": "Initial retry backoff in milliseconds" }, - "retry_backoff_max_ms": { + "retry_backoff_max": { "type": "integer", "minimum": 0, "description": "Maximum retry backoff in milliseconds" }, + "enforce_http2": { + "type": "boolean", + "description": "Force HTTP/2 on provider connections (relevant for Bedrock and other net/http-based providers)" + }, "insecure_skip_verify": { "type": "boolean", "description": "Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments." @@ -2124,7 +2038,18 @@ }, "additionalProperties": false }, - "concurrency_config": { + "openai_config": { + "type": "object", + "description": "OpenAI-specific provider settings", + "properties": { + "disable_store": { + "type": "boolean", + "description": "Disable OpenAI Responses API conversation storage." + } + }, + "additionalProperties": false + }, + "concurrency_and_buffer_size": { "type": "object", "properties": { "concurrency": { @@ -2160,8 +2085,7 @@ "items": { "type": "string" }, - "default": [], - "description": "Supported models for this key" + "description": "Models this key can access. Use [\"*\"] to allow all models; empty array denies all (deny-by-default)." }, "weight": { "type": "number", @@ -2172,6 +2096,17 @@ "type": "boolean", "description": "Whether this key can be used for batch API operations (default: false)", "default": false + }, + "aliases": { + "type": "object", + "additionalProperties": { + "type": "string", + "minLength": 1 + }, + "propertyNames": { + "minLength": 1 + }, + "description": "Model alias mappings: maps a model name to a provider-specific identifier (deployment name, inference profile ID, fine-tuned model ID, etc.)" } }, "required": [ @@ -2252,7 +2187,9 @@ "description": "Whether this is the default bucket for batch operations" } }, - "required": ["bucket_name"], + "required": [ + "bucket_name" + ], "additionalProperties": false } } @@ -2265,10 +2202,7 @@ ], "additionalProperties": false } - }, - "required": [ - "bedrock_key_config" - ] + } } ] }, @@ -2307,6 +2241,86 @@ } ] }, + "replicate_key": { + "allOf": [ + { + "$ref": "#/$defs/base_key" + }, + { + "type": "object", + "properties": { + "replicate_key_config": { + "type": "object", + "properties": { + "use_deployments_endpoint": { + "type": "boolean", + "description": "Whether to use the deployments endpoint instead of the models endpoint (default: false)" + } + }, + "additionalProperties": false + } + } + } + ] + }, + "ollama_key": { + "allOf": [ + { + "$ref": "#/$defs/base_key" + }, + { + "type": "object", + "properties": { + "ollama_key_config": { + "type": "object", + "properties": { + "url": { + "type": "string", + "minLength": 1, + "description": "Ollama server base URL (can use env. prefix)" + } + }, + "required": [ + "url" + ], + "additionalProperties": false + } + }, + "required": [ + "ollama_key_config" + ] + } + ] + }, + "sgl_key": { + "allOf": [ + { + "$ref": "#/$defs/base_key" + }, + { + "type": "object", + "properties": { + "sgl_key_config": { + "type": "object", + "properties": { + "url": { + "type": "string", + "minLength": 1, + "description": "SGLang server base URL (can use env. prefix)" + } + }, + "required": [ + "url" + ], + "additionalProperties": false + } + }, + "required": [ + "sgl_key_config" + ] + } + ] + }, "azure_key": { "allOf": [ { @@ -2322,13 +2336,6 @@ "type": "string", "description": "Azure endpoint (can use env. prefix)" }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - }, "api_version": { "type": "string", "description": "Azure API version" @@ -2373,13 +2380,6 @@ "auth_credentials": { "type": "string", "description": "Authentication credentials (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" } }, "required": [ @@ -2393,15 +2393,138 @@ "vertex_key_config" ] } - ] + ] + }, + "provider": { + "type": "object", + "properties": { + "keys": { + "type": "array", + "items": { + "$ref": "#/$defs/base_key" + }, + "minItems": 1, + "description": "API keys for this provider" + }, + "network_config": { + "$ref": "#/$defs/network_config" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "send_back_raw_request": { + "type": "boolean", + "description": "Include raw request in BifrostResponse (default: false)" + }, + "send_back_raw_response": { + "type": "boolean", + "description": "Include raw response in BifrostResponse (default: false)" + }, + "store_raw_request_response": { + "type": "boolean", + "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" + } + }, + "required": [ + "keys" + ], + "additionalProperties": false + }, + "provider_with_bedrock_config": { + "type": "object", + "properties": { + "keys": { + "type": "array", + "items": { + "$ref": "#/$defs/bedrock_key" + }, + "minItems": 1, + "description": "API keys for this provider" + }, + "network_config": { + "$ref": "#/$defs/network_config" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "send_back_raw_request": { + "type": "boolean", + "description": "Include raw request in BifrostResponse (default: false)" + }, + "send_back_raw_response": { + "type": "boolean", + "description": "Include raw response in BifrostResponse (default: false)" + }, + "store_raw_request_response": { + "type": "boolean", + "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" + } + }, + "required": [ + "keys" + ], + "additionalProperties": false + }, + "provider_with_vllm_config": { + "type": "object", + "properties": { + "keys": { + "type": "array", + "items": { + "$ref": "#/$defs/vllm_key" + }, + "minItems": 1, + "description": "API keys for this provider" + }, + "network_config": { + "$ref": "#/$defs/network_config_without_base_url" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "send_back_raw_request": { + "type": "boolean", + "description": "Include raw request in BifrostResponse (default: false)" + }, + "send_back_raw_response": { + "type": "boolean", + "description": "Include raw response in BifrostResponse (default: false)" + }, + "store_raw_request_response": { + "type": "boolean", + "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" + } + }, + "required": [ + "keys" + ], + "additionalProperties": false }, - "provider": { + "provider_with_replicate_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/base_key" + "$ref": "#/$defs/replicate_key" }, "minItems": 1, "description": "API keys for this provider" @@ -2410,7 +2533,7 @@ "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2429,13 +2552,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2443,13 +2559,13 @@ ], "additionalProperties": false }, - "provider_with_bedrock_config": { + "provider_with_azure_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/bedrock_key" + "$ref": "#/$defs/azure_key" }, "minItems": 1, "description": "API keys for this provider" @@ -2458,7 +2574,7 @@ "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2477,13 +2593,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2491,13 +2600,13 @@ ], "additionalProperties": false }, - "provider_with_vllm_config": { + "provider_with_vertex_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/vllm_key" + "$ref": "#/$defs/vertex_key" }, "minItems": 1, "description": "API keys for this provider" @@ -2506,7 +2615,7 @@ "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2525,13 +2634,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2539,22 +2641,22 @@ ], "additionalProperties": false }, - "provider_with_azure_config": { + "provider_with_ollama_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/azure_key" + "$ref": "#/$defs/ollama_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { - "$ref": "#/$defs/network_config" + "$ref": "#/$defs/network_config_without_base_url" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2573,13 +2675,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2587,22 +2682,22 @@ ], "additionalProperties": false }, - "provider_with_vertex_config": { + "provider_with_sgl_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/vertex_key" + "$ref": "#/$defs/sgl_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { - "$ref": "#/$defs/network_config" + "$ref": "#/$defs/network_config_without_base_url" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2621,13 +2716,6 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, "required": [ @@ -2654,9 +2742,9 @@ "type": "string", "enum": [ "stdio", - "websocket", "http", - "sse" + "sse", + "inprocess" ], "description": "Connection type for MCP client" }, @@ -2666,12 +2754,17 @@ }, "auth_type": { "type": "string", - "enum": ["none", "headers", "oauth"], + "enum": [ + "none", + "headers", + "oauth", + "per_user_oauth" + ], "description": "Authentication type for MCP connection" }, "oauth_config_id": { "type": "string", - "description": "OAuth config ID reference (for oauth auth type)" + "description": "OAuth config ID reference (required when auth_type is 'oauth' or 'per_user_oauth')" }, "headers": { "type": "object", @@ -2707,34 +2800,6 @@ ], "additionalProperties": false }, - "websocket_config": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "WebSocket URL" - } - }, - "required": [ - "url" - ], - "additionalProperties": false - }, - "http_config": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "HTTP URL" - } - }, - "required": [ - "url" - ], - "additionalProperties": false - }, "tools_to_execute": { "type": "array", "items": { @@ -2753,6 +2818,13 @@ "type": "string", "description": "Per-client override for tool sync interval (Go duration, e.g. '10m', '1h', 0 = use global, negative = disabled)" }, + "allowed_extra_headers": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Allowlist of request-level headers that callers may forward to this MCP server at execution time. Use ['*'] to allow all headers." + }, "is_ping_available": { "type": "boolean", "description": "Whether the MCP server supports ping for health checks (default: true)", @@ -2765,6 +2837,11 @@ "type": "number", "minimum": 0 } + }, + "allow_on_all_virtual_keys": { + "type": "boolean", + "description": "When true, this MCP server is accessible to all virtual keys without requiring explicit per-key assignment. All tools are allowed by default. If a virtual key has an explicit MCP config for this server, that config takes precedence and overrides this behaviour.", + "default": false } }, "required": [ @@ -2772,6 +2849,24 @@ "connection_type" ], "additionalProperties": false, + "if": { + "properties": { + "auth_type": { + "enum": [ + "oauth", + "per_user_oauth" + ] + } + }, + "required": [ + "auth_type" + ] + }, + "then": { + "required": [ + "oauth_config_id" + ] + }, "oneOf": [ { "properties": { @@ -2800,8 +2895,16 @@ } }, "anyOf": [ - { "required": ["http_config"] }, - { "required": ["connection_string"] } + { + "required": [ + "http_config" + ] + }, + { + "required": [ + "connection_string" + ] + } ] }, { @@ -2833,8 +2936,16 @@ }, "code_mode_binding_level": { "type": "string", - "enum": ["server", "tool"], + "enum": [ + "server", + "tool" + ], "description": "How tools are exposed in VFS for code execution" + }, + "disable_auto_tool_inject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers, such as x-bf-mcp-include-tools or x-bf-mcp-include-clients.", + "default": false } } }, @@ -2873,7 +2984,7 @@ }, "timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for Weaviate operations (e.g., '5s')" }, "class_name": { @@ -2952,32 +3063,32 @@ }, "conn_max_lifetime": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Connection maximum lifetime (e.g., '30m')" }, "conn_max_idle_time": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Connection maximum idle time (e.g., '5m')" }, "dial_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket connection (e.g., '5s')" }, "read_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket reads (e.g., '3s')" }, "write_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket writes (e.g., '3s')" }, "context_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for Redis operations (e.g., '10s')" } }, @@ -3224,7 +3335,7 @@ ], "additionalProperties": false }, - "saml_config": { + "scim_config": { "type": "object", "description": "SAML/SCIM (System for Cross-domain Identity Management) configuration", "properties": { @@ -3299,7 +3410,11 @@ }, "clientSecret": { "type": "string", - "description": "Okta client secret (optional, required for token revocation)" + "description": "Okta client secret" + }, + "apiToken": { + "type": "string", + "description": "Okta API token for Admin API access" }, "audience": { "type": "string", @@ -3323,7 +3438,9 @@ }, "required": [ "issuerUrl", - "clientId" + "clientId", + "clientSecret", + "apiToken" ], "additionalProperties": false }, @@ -3345,7 +3462,11 @@ }, "cloud": { "type": "string", - "enum": ["commercial", "gcc-high", "dod"], + "enum": [ + "commercial", + "gcc-high", + "dod" + ], "default": "commercial", "description": "Cloud environment: 'commercial' (default), 'gcc-high' for US Government GCC High, or 'dod' for Department of Defense" }, @@ -3640,6 +3761,312 @@ } }, "additionalProperties": false + }, + "provider_pricing_override": { + "type": "object", + "description": "Scoped pricing override applied at runtime by the model catalog", + "properties": { + "id": { + "type": "string", + "description": "Unique pricing override ID" + }, + "name": { + "type": "string", + "description": "Human-readable name for this override" + }, + "scope_kind": { + "type": "string", + "description": "Scope level for this override", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ] + }, + "virtual_key_id": { + "type": "string", + "description": "Virtual key ID (required for virtual_key* scopes)" + }, + "provider_id": { + "type": "string", + "description": "Provider ID (required for provider* scopes)" + }, + "provider_key_id": { + "type": "string", + "description": "Provider key ID (required for provider_key and virtual_key_provider_key scopes)" + }, + "match_type": { + "type": "string", + "description": "How the pattern is matched against model names", + "enum": [ + "exact", + "wildcard" + ] + }, + "pattern": { + "type": "string", + "description": "Model name pattern to match (exact name or wildcard prefix ending with *)" + }, + "request_types": { + "type": "array", + "description": "Request types this override applies to. At least one value is required.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (e.g. '{\"input_cost_per_token\":0.000001}')" + }, + "config_hash": { + "type": "string", + "description": "Internal hash for change detection (auto-managed)" + } + }, + "required": [ + "id", + "name", + "scope_kind", + "match_type", + "pattern", + "request_types" + ], + "additionalProperties": false + }, + "pricing_override_match_type": { + "type": "string", + "enum": [ + "exact", + "wildcard" + ] + }, + "pricing_override_request_type": { + "type": "string", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "custom_provider_config": { + "type": "object", + "description": "Custom provider configuration for extending or customizing provider behavior", + "properties": { + "is_key_less": { + "type": "boolean", + "description": "Whether the custom provider requires a key" + }, + "base_provider_type": { + "type": "string", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "replicate", + "vllm", + "runway", + "fireworks" + ], + "description": "Base provider type to extend" + }, + "request_path_overrides": { + "type": "object", + "description": "Mapping of request type to custom path overriding the default provider path", + "additionalProperties": { + "type": "string" + } + }, + "allowed_requests": { + "type": "object", + "description": "Allowed request types for the custom provider", + "properties": { + "list_models": { + "type": "boolean" + }, + "text_completion": { + "type": "boolean" + }, + "text_completion_stream": { + "type": "boolean" + }, + "chat_completion": { + "type": "boolean" + }, + "chat_completion_stream": { + "type": "boolean" + }, + "responses": { + "type": "boolean" + }, + "responses_stream": { + "type": "boolean" + }, + "count_tokens": { + "type": "boolean" + }, + "embedding": { + "type": "boolean" + }, + "rerank": { + "type": "boolean" + }, + "ocr": { + "type": "boolean" + }, + "speech": { + "type": "boolean" + }, + "speech_stream": { + "type": "boolean" + }, + "transcription": { + "type": "boolean" + }, + "transcription_stream": { + "type": "boolean" + }, + "image_generation": { + "type": "boolean" + }, + "image_generation_stream": { + "type": "boolean" + }, + "image_edit": { + "type": "boolean" + }, + "image_edit_stream": { + "type": "boolean" + }, + "image_variation": { + "type": "boolean" + }, + "video_generation": { + "type": "boolean" + }, + "video_retrieve": { + "type": "boolean" + }, + "video_download": { + "type": "boolean" + }, + "video_delete": { + "type": "boolean" + }, + "video_list": { + "type": "boolean" + }, + "video_remix": { + "type": "boolean" + }, + "batch_create": { + "type": "boolean" + }, + "batch_list": { + "type": "boolean" + }, + "batch_retrieve": { + "type": "boolean" + }, + "batch_cancel": { + "type": "boolean" + }, + "batch_delete": { + "type": "boolean" + }, + "batch_results": { + "type": "boolean" + }, + "file_upload": { + "type": "boolean" + }, + "file_list": { + "type": "boolean" + }, + "file_retrieve": { + "type": "boolean" + }, + "file_delete": { + "type": "boolean" + }, + "file_content": { + "type": "boolean" + }, + "container_create": { + "type": "boolean" + }, + "container_list": { + "type": "boolean" + }, + "container_retrieve": { + "type": "boolean" + }, + "container_delete": { + "type": "boolean" + }, + "container_file_create": { + "type": "boolean" + }, + "container_file_list": { + "type": "boolean" + }, + "container_file_retrieve": { + "type": "boolean" + }, + "container_file_content": { + "type": "boolean" + }, + "container_file_delete": { + "type": "boolean" + }, + "passthrough": { + "type": "boolean" + }, + "passthrough_stream": { + "type": "boolean" + }, + "websocket_responses": { + "type": "boolean" + }, + "realtime": { + "type": "boolean" + } + }, + "additionalProperties": false + } + }, + "required": [ + "base_provider_type" + ], + "additionalProperties": false } } } \ No newline at end of file