diff --git a/docs/deployment-guides/config-json.mdx b/docs/deployment-guides/config-json.mdx
new file mode 100644
index 0000000000..413c0e5a51
--- /dev/null
+++ b/docs/deployment-guides/config-json.mdx
@@ -0,0 +1,313 @@
+---
+title: "Quick Start"
+description: "Configure Bifrost using a config.json file — GitOps-friendly, no-UI deployments, and multinode OSS setups"
+icon: "file-code"
+---
+
+
+**Full schema reference:** [`https://www.getbifrost.ai/schema`](https://www.getbifrost.ai/schema)
+
+
+`config.json` lets you configure every aspect of Bifrost through a single declarative file. It is the right choice for GitOps workflows, CI/CD pipelines, headless deployments, and multinode OSS setups where a central configuration file is shared across all replicas.
+
+---
+
+## Two Configuration Modes
+
+Bifrost supports **two mutually exclusive modes**. You cannot run both at the same time.
+
+| Mode | When | Behaviour |
+|------|------|-----------|
+| **Web UI / database** | No `config.json`, or `config.json` with `config_store` enabled | Full UI available, configuration stored in SQLite or PostgreSQL |
+| **File-based (`config.json`)** | `config.json` present, `config_store` disabled | UI disabled, all config loaded from file at startup, restart required for changes |
+
+
+See [Setting Up](/quickstart/gateway/setting-up#two-configuration-modes) for a full explanation of both modes and how `config_store` bootstrapping works.
+
+
+---
+
+## Minimal Working Example
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+ "client": {
+ "drop_excess_requests": false,
+ "enable_logging": true
+ },
+ "providers": {
+ "openai": {
+ "keys": [
+ {
+ "name": "openai-primary",
+ "value": "env.OPENAI_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ]
+ }
+ },
+ "config_store": {
+ "enabled": false
+ }
+}
+```
+
+Save this as `config.json` in your app directory and start Bifrost:
+
+```bash
+# NPX
+npx -y @maximhq/bifrost -app-dir ./data
+
+# Docker
+docker run -p 8080:8080 \
+ -v $(pwd)/data:/app/data \
+ -e OPENAI_API_KEY=sk-... \
+ -e BIFROST_ENCRYPTION_KEY=your-32-byte-key \
+ maximhq/bifrost
+```
+
+Make your first call:
+
+```bash
+curl http://localhost:8080/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "openai/gpt-4o-mini",
+ "messages": [{"role": "user", "content": "Hello!"}]
+ }'
+```
+
+---
+
+## Environment Variable References
+
+Never put secrets directly in `config.json`. Use the `env.` prefix to reference any environment variable:
+
+```json
+{
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+ "providers": {
+ "openai": {
+ "keys": [
+ {
+ "name": "primary",
+ "value": "env.OPENAI_API_KEY",
+ "weight": 1.0
+ }
+ ]
+ }
+ }
+}
+```
+
+Set the actual values through your deployment platform — shell environment, Docker `-e`, Kubernetes Secrets mounted as env vars, or a `.env` file.
+
+---
+
+## Schema Validation
+
+Add `$schema` to every `config.json` for IDE autocomplete and inline validation:
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema"
+}
+```
+
+Editors (VS Code, JetBrains, Neovim with LSP) will show completions and flag invalid fields as you type.
+
+---
+
+## Production Example
+
+A production-ready file with PostgreSQL storage, multi-provider setup, governance, and common plugins:
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+ "client": {
+ "initial_pool_size": 500,
+ "drop_excess_requests": true,
+ "enable_logging": true,
+ "log_retention_days": 90,
+ "enforce_auth_on_inference": true,
+ "allow_direct_keys": false,
+ "allowed_origins": ["https://app.yourcompany.com"]
+ },
+
+ "providers": {
+ "openai": {
+ "keys": [
+ {
+ "name": "openai-primary",
+ "value": "env.OPENAI_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ],
+ "network_config": {
+ "default_request_timeout_in_seconds": 120,
+ "max_retries": 3
+ }
+ },
+ "anthropic": {
+ "keys": [
+ {
+ "name": "anthropic-primary",
+ "value": "env.ANTHROPIC_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ]
+ }
+ },
+
+ "config_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require"
+ }
+ },
+
+ "logs_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require"
+ }
+ }
+}
+```
+
+---
+
+## Example Configs
+
+Ready-to-use reference configurations from the [examples/configs](https://github.com/maximhq/bifrost/tree/main/examples/configs) directory on GitHub:
+
+
+
+
+
+| Example | Description |
+|---------|-------------|
+| [noconfigstorenologstore](https://github.com/maximhq/bifrost/blob/main/examples/configs/noconfigstorenologstore/config.json) | Bare-minimum file-only mode — no database, no UI, providers loaded from file |
+| [partial](https://github.com/maximhq/bifrost/blob/main/examples/configs/partial/config.json) | SQLite config store with a minimal provider setup |
+| [v1compat](https://github.com/maximhq/bifrost/blob/main/examples/configs/v1compat/config.json) | `"version": 1` for v1.4.x array semantics (empty = allow all) |
+
+
+
+
+
+| Example | Description |
+|---------|-------------|
+| [withconfigstore](https://github.com/maximhq/bifrost/blob/main/examples/configs/withconfigstore/config.json) | SQLite config store (Web UI enabled) |
+| [withconfigstorelogsstorepostgres](https://github.com/maximhq/bifrost/blob/main/examples/configs/withconfigstorelogsstorepostgres/config.json) | PostgreSQL for both config store and logs store |
+| [withlogstore](https://github.com/maximhq/bifrost/blob/main/examples/configs/withlogstore/config.json) | SQLite logs store |
+| [withobjectstorages3](https://github.com/maximhq/bifrost/blob/main/examples/configs/withobjectstorages3/config.json) | S3 object storage offload for logs |
+| [withobjectstoragegcs](https://github.com/maximhq/bifrost/blob/main/examples/configs/withobjectstoragegcs/config.json) | GCS object storage offload for logs |
+| [withvectorstoreweaviate](https://github.com/maximhq/bifrost/blob/main/examples/configs/withvectorstoreweaviate/config.json) | Weaviate vector store (with [docker-compose](https://github.com/maximhq/bifrost/blob/main/examples/configs/withvectorstoreweaviate/docker-compose.yml)) |
+
+
+
+
+
+| Example | Description |
+|---------|-------------|
+| [withsemanticcache](https://github.com/maximhq/bifrost/blob/main/examples/configs/withsemanticcache/config.json) | Semantic cache backed by Weaviate |
+| [withsemanticcachevalkey](https://github.com/maximhq/bifrost/blob/main/examples/configs/withsemanticcachevalkey/config.json) | Semantic cache backed by Valkey / Redis |
+
+
+
+
+
+| Example | Description |
+|---------|-------------|
+| [withauth](https://github.com/maximhq/bifrost/blob/main/examples/configs/withauth/config.json) | Admin username/password auth (`governance.auth_config`) |
+| [withvirtualkeys](https://github.com/maximhq/bifrost/blob/main/examples/configs/withvirtualkeys/config.json) | Virtual keys with provider/model allowlists |
+| [withteamscustomers](https://github.com/maximhq/bifrost/blob/main/examples/configs/withteamscustomers/config.json) | Teams and customers with budgets and rate limits |
+| [withroutingrules](https://github.com/maximhq/bifrost/blob/main/examples/configs/withroutingrules/config.json) | CEL-based routing rules for dynamic provider/model selection |
+| [withpricingoverridesnostore](https://github.com/maximhq/bifrost/blob/main/examples/configs/withpricingoverridesnostore/config.json) | Pricing overrides in file-only mode |
+| [withpricingoverridessqlite](https://github.com/maximhq/bifrost/blob/main/examples/configs/withpricingoverridessqlite/config.json) | Pricing overrides with SQLite config store |
+
+
+
+
+
+| Example | Description |
+|---------|-------------|
+| [withobservability](https://github.com/maximhq/bifrost/blob/main/examples/configs/withobservability/config.json) | Prometheus metrics (telemetry always active, custom labels via `client.prometheus_labels`) |
+| [withprompushgateway](https://github.com/maximhq/bifrost/blob/main/examples/configs/withprompushgateway/config.json) | Prometheus Push Gateway for multi-instance deployments |
+| [withotel](https://github.com/maximhq/bifrost/blob/main/examples/configs/withotel/config.json) | OpenTelemetry traces and metrics |
+
+
+
+
+
+| Example | Description |
+|---------|-------------|
+| [withdynamicplugin](https://github.com/maximhq/bifrost/blob/main/examples/configs/withdynamicplugin/config.json) | Loading a custom `.so` plugin at startup |
+| [withcompat](https://github.com/maximhq/bifrost/blob/main/examples/configs/withcompat/config.json) | SDK compatibility shims (`should_drop_params`, `convert_text_to_chat`) |
+| [withframework](https://github.com/maximhq/bifrost/blob/main/examples/configs/withframework/config.json) | Custom model pricing catalog URL and sync interval |
+| [withlargepayload](https://github.com/maximhq/bifrost/blob/main/examples/configs/withlargepayload/config.json) | Large payload optimization (streaming without full materialisation) |
+| [withwebsocket](https://github.com/maximhq/bifrost/blob/main/examples/configs/withwebsocket/config.json) | WebSocket / Realtime API connection pool tuning |
+| [withpostgresmcpclientsinconfig](https://github.com/maximhq/bifrost/blob/main/examples/configs/withpostgresmcpclientsinconfig/config.json) | MCP client definitions seeded from config.json with PostgreSQL store |
+| [encryptionmigration](https://github.com/maximhq/bifrost/blob/main/examples/configs/encryptionmigration/config.json) | Migrating to a new encryption key |
+
+
+
+
+
+---
+
+## Configuration Guides
+
+
+
+ Every top-level key, its type, default, and where it is documented
+
+
+ Pool size, logging, CORS, header filtering, compat shims, MCP settings
+
+
+ OpenAI, Anthropic, Azure, Bedrock, Vertex, Groq, self-hosted
+
+
+ config_store, logs_store, vector_store — SQLite, PostgreSQL, object storage
+
+
+ Semantic cache, OTel, Maxim, Datadog, custom plugins
+
+
+ Virtual keys, budgets, rate limits, routing rules, admin auth
+
+
+ Content moderation providers and CEL-based rules (enterprise)
+
+
+
+---
+
+## Next Steps
+
+1. Configure [provider keys](/providers/supported-providers/overview)
+2. Enable [plugins](/plugins/getting-started)
+3. Set up [observability](/features/observability/default)
+4. Configure [governance](/features/governance/virtual-keys)
+5. Deploy [multiple nodes](/deployment-guides/how-to/multinode) with a shared `config.json`
diff --git a/docs/deployment-guides/config-json/client.mdx b/docs/deployment-guides/config-json/client.mdx
new file mode 100644
index 0000000000..1a974df77b
--- /dev/null
+++ b/docs/deployment-guides/config-json/client.mdx
@@ -0,0 +1,276 @@
+---
+title: "Client Configuration"
+description: "Configure the Bifrost client in config.json — connection pool, logging, CORS, header filtering, compat shims, and MCP settings"
+icon: "gear"
+---
+
+The `client` block controls how Bifrost manages its internal worker pool, request logging, authentication enforcement, header policies, SDK compatibility shims, and MCP agent behaviour.
+
+---
+
+## Connection Pool
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `initial_pool_size` | integer | `300` | Pre-allocated worker goroutines per provider queue |
+| `drop_excess_requests` | boolean | `false` | Drop requests when queue is full instead of waiting (returns HTTP 429) |
+
+A larger pool reduces latency spikes under burst load at the cost of higher baseline memory. `500–1000` is a common starting point for production workloads with multiple providers.
+
+```json
+{
+ "client": {
+ "initial_pool_size": 1000,
+ "drop_excess_requests": true
+ }
+}
+```
+
+---
+
+## Request & Response Logging
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enable_logging` | boolean | — | Log all LLM requests and responses |
+| `disable_content_logging` | boolean | `false` | Strip message content from logs (keeps metadata only) |
+| `log_retention_days` | integer | `365` | Days to retain log entries in the store |
+| `logging_headers` | array of strings | `[]` | HTTP request headers to capture in log metadata |
+
+Set `disable_content_logging: true` for HIPAA / PCI compliance workloads where message content must not be persisted.
+
+```json
+{
+ "client": {
+ "enable_logging": true,
+ "disable_content_logging": true,
+ "log_retention_days": 90,
+ "logging_headers": ["x-request-id", "x-user-id"]
+ }
+}
+```
+
+---
+
+## Security & CORS
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `allowed_origins` | array | `["*"]` | CORS allowed origins (use URIs or `"*"`) |
+| `allow_direct_keys` | boolean | `false` | Allow callers to pass provider keys directly in requests |
+| `enforce_auth_on_inference` | boolean | `false` | Require auth (virtual key, API key, or user token) on `/v1/*` inference routes |
+| `max_request_body_size_mb` | integer | `100` | Maximum allowed request body size in MB |
+| `whitelisted_routes` | array of strings | `[]` | Routes that bypass auth middleware |
+| `allowed_headers` | array of strings | `[]` | Additional headers permitted for CORS and WebSocket |
+
+```json
+{
+ "client": {
+ "allowed_origins": [
+ "https://app.yourcompany.com",
+ "https://admin.yourcompany.com"
+ ],
+ "allow_direct_keys": false,
+ "enforce_auth_on_inference": true,
+ "max_request_body_size_mb": 50,
+ "whitelisted_routes": ["/health", "/metrics"]
+ }
+}
+```
+
+---
+
+## Header Filtering
+
+Controls which `x-bf-eh-*` extra headers are forwarded to upstream LLM providers.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `header_filter_config.allowlist` | array of strings | `[]` | Only these headers are forwarded (whitelist mode) |
+| `header_filter_config.denylist` | array of strings | `[]` | These headers are always blocked |
+| `required_headers` | array of strings | `[]` | Headers that must be present on every request (rejected with 400 if missing) |
+
+When both `allowlist` and `denylist` are empty, all `x-bf-eh-*` headers pass through. Specifying an `allowlist` enables strict whitelist mode — only listed headers are forwarded.
+
+```json
+{
+ "client": {
+ "header_filter_config": {
+ "allowlist": [
+ "x-bf-eh-anthropic-version",
+ "x-bf-eh-openai-beta"
+ ],
+ "denylist": []
+ },
+ "required_headers": ["x-request-id"]
+ }
+}
+```
+
+---
+
+## Compat Shims
+
+Compatibility flags that let Bifrost silently adapt request/response shapes for SDK integrations.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `compat.convert_text_to_chat` | boolean | `false` | Wrap legacy `/v1/completions` text requests as chat messages |
+| `compat.convert_chat_to_responses` | boolean | `false` | Translate chat completions to Responses API format |
+| `compat.should_drop_params` | boolean | `false` | Silently drop unsupported parameters instead of erroring |
+| `compat.should_convert_params` | boolean | `false` | Auto-convert parameter values across provider schemas |
+
+```json
+{
+ "client": {
+ "compat": {
+ "should_drop_params": true,
+ "convert_text_to_chat": true
+ }
+ }
+}
+```
+
+---
+
+## MCP Agent Settings
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `mcp_agent_depth` | integer | `10` | Maximum tool-call recursion depth for MCP agent mode |
+| `mcp_tool_execution_timeout` | integer | `30` | Timeout per MCP tool execution in seconds |
+| `mcp_code_mode_binding_level` | string | — | Code mode binding level: `"server"` or `"tool"` |
+| `mcp_tool_sync_interval` | integer | `10` | Global tool sync interval in minutes (`0` = disabled) |
+| `mcp_disable_auto_tool_inject` | boolean | `false` | When `true`, MCP tools are not automatically injected into requests |
+
+```json
+{
+ "client": {
+ "mcp_agent_depth": 15,
+ "mcp_tool_execution_timeout": 60,
+ "mcp_tool_sync_interval": 10
+ }
+}
+```
+
+---
+
+## Async Jobs
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `async_job_result_ttl` | integer | `3600` | TTL (seconds) for async job results |
+| `disable_db_pings_in_health` | boolean | `false` | Exclude database connectivity from `/health` endpoint checks |
+
+---
+
+## Prometheus Labels
+
+Add custom labels to every Prometheus metric emitted by Bifrost:
+
+```json
+{
+ "client": {
+ "prometheus_labels": ["environment=production", "region=us-east-1"]
+ }
+}
+```
+
+---
+
+## Authentication
+
+`governance.auth_config` protects the Bifrost dashboard and management API with username/password auth.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `is_enabled` | boolean | `false` | Enable username/password auth |
+| `admin_username` | string | — | Admin username |
+| `admin_password` | string | — | Admin password (use `env.` reference) |
+| `disable_auth_on_inference` | boolean | `false` | Skip auth check on `/v1/*` inference routes |
+
+```json
+{
+ "governance": {
+ "auth_config": {
+ "is_enabled": true,
+ "admin_username": "env.BIFROST_ADMIN_USERNAME",
+ "admin_password": "env.BIFROST_ADMIN_PASSWORD",
+ "disable_auth_on_inference": false
+ }
+ }
+}
+```
+
+
+A top-level `auth_config` is also accepted for backwards compatibility, but `governance.auth_config` is the preferred location.
+
+
+---
+
+## Encryption Key
+
+```json
+{
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY"
+}
+```
+
+| Notes |
+|-------|
+| Accepts any string; Bifrost derives a 32-byte AES-256 key using Argon2id |
+| Can also be set via the `BIFROST_ENCRYPTION_KEY` environment variable |
+| Once set and the database is populated, the key cannot be changed without clearing the database |
+| Omitting the key stores data in plain text — not recommended for production |
+
+---
+
+## Full Example
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+ "governance": {
+ "auth_config": {
+ "is_enabled": true,
+ "admin_username": "env.BIFROST_ADMIN_USERNAME",
+ "admin_password": "env.BIFROST_ADMIN_PASSWORD",
+ "disable_auth_on_inference": false
+ }
+ },
+
+ "client": {
+ "initial_pool_size": 1000,
+ "drop_excess_requests": true,
+
+ "enable_logging": true,
+ "disable_content_logging": false,
+ "log_retention_days": 90,
+ "logging_headers": ["x-request-id", "x-user-id"],
+
+ "allowed_origins": ["https://app.yourcompany.com"],
+ "allow_direct_keys": false,
+ "enforce_auth_on_inference": true,
+ "max_request_body_size_mb": 100,
+
+ "header_filter_config": {
+ "allowlist": [],
+ "denylist": []
+ },
+ "required_headers": [],
+
+ "compat": {
+ "should_drop_params": false
+ },
+
+ "prometheus_labels": ["environment=production"],
+
+ "mcp_agent_depth": 10,
+ "mcp_tool_execution_timeout": 30,
+
+ "async_job_result_ttl": 3600
+ }
+}
+```
diff --git a/docs/deployment-guides/config-json/governance.mdx b/docs/deployment-guides/config-json/governance.mdx
new file mode 100644
index 0000000000..16ed48115e
--- /dev/null
+++ b/docs/deployment-guides/config-json/governance.mdx
@@ -0,0 +1,333 @@
+---
+title: "Governance"
+description: "Seed virtual keys, budgets, rate limits, routing rules, and admin auth in config.json"
+icon: "shield-check"
+---
+
+The `governance` block lets you seed all governance resources directly in `config.json`. On startup, Bifrost loads these into the configuration store. This is the recommended approach for GitOps workflows where governance state is managed as code.
+
+
+**Governance enforcement is always active** in OSS — you do not need a plugin entry to enable it. To require a virtual key on every inference request, set `client.enforce_auth_on_inference: true`. This is the global default, but a more specific inference-auth flag such as `governance.auth_config.disable_auth_on_inference` overrides it; if no specific override is set, `client.enforce_auth_on_inference` applies.
+
+
+---
+
+## Admin Authentication
+
+Protect the Bifrost dashboard and management API with username/password auth:
+
+```json
+{
+ "governance": {
+ "auth_config": {
+ "is_enabled": true,
+ "admin_username": "env.BIFROST_ADMIN_USERNAME",
+ "admin_password": "env.BIFROST_ADMIN_PASSWORD",
+ "disable_auth_on_inference": false
+ }
+ }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `is_enabled` | `false` | Enable admin username/password auth |
+| `admin_username` | — | Admin username (supports `env.` prefix) |
+| `admin_password` | — | Admin password (supports `env.` prefix) |
+| `disable_auth_on_inference` | `false` | Skip auth check on `/v1/*` inference routes |
+
+---
+
+## Virtual Keys
+
+Virtual keys are issued to clients and act as scoped API tokens. Each key specifies which providers, models, and API keys the bearer is allowed to use.
+
+```json
+{
+ "governance": {
+ "virtual_keys": [
+ {
+ "id": "vk-team-platform",
+ "name": "platform-team",
+ "value": "env.VK_PLATFORM_TEAM",
+ "is_active": true,
+ "provider_configs": [
+ {
+ "provider": "openai",
+ "allowed_models": ["gpt-4o", "gpt-4o-mini"],
+ "key_ids": ["*"],
+ "weight": 1
+ },
+ {
+ "provider": "anthropic",
+ "allowed_models": ["*"],
+ "key_ids": ["*"],
+ "weight": 1
+ }
+ ]
+ }
+ ]
+ }
+}
+```
+
+### Virtual Key Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique virtual key ID (referenced by budgets / rate limits) |
+| `name` | Yes | Human-readable name |
+| `value` | No | The key token sent by clients (use `env.` prefix). Auto-generated if omitted |
+| `is_active` | No | Default `true`. Set `false` to disable without deleting |
+| `team_id` | No | Associate with a team (mutually exclusive with `customer_id`) |
+| `customer_id` | No | Associate with a customer |
+| `rate_limit_id` | No | Attach a rate limit |
+| `calendar_aligned` | No | Snap budget resets to day/week/month/year boundaries |
+| `provider_configs` | No | Allowed provider/model/key combinations (empty = deny all) |
+
+### Provider Config Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `provider` | Yes | Provider name (e.g. `"openai"`) |
+| `allowed_models` | No | Model allow-list. `["*"]` = all models; `[]` = deny all |
+| `key_ids` | No | Provider key names allowed for this VK. `["*"]` = all keys; `[]` = deny all. Use key `name` values (not UUIDs) in `config.json` |
+| `weight` | No | Load-balancing weight when multiple provider configs are present |
+| `rate_limit_id` | No | Attach a per-provider-config rate limit |
+
+---
+
+## Budgets
+
+Budgets cap cumulative spend (in USD) for a virtual key or provider config over a rolling window:
+
+```json
+{
+ "governance": {
+ "budgets": [
+ {
+ "id": "budget-platform-monthly",
+ "max_limit": 500.00,
+ "reset_duration": "1M",
+ "virtual_key_id": "vk-team-platform"
+ }
+ ]
+ }
+}
+```
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique budget ID |
+| `max_limit` | Yes | Maximum spend in USD |
+| `reset_duration` | Yes | Window length: `"30s"`, `"5m"`, `"1h"`, `"1d"`, `"1w"`, `"1M"`, `"1Y"` |
+| `virtual_key_id` | No | Attach to a virtual key (mutually exclusive with `provider_config_id`) |
+| `provider_config_id` | No | Attach to a provider config ID |
+
+---
+
+## Rate Limits
+
+Rate limits cap requests or tokens over a rolling window:
+
+```json
+{
+ "governance": {
+ "rate_limits": [
+ {
+ "id": "rl-platform-hourly",
+ "request_max_limit": 1000,
+ "request_reset_duration": "1h",
+ "token_max_limit": 1000000,
+ "token_reset_duration": "1h"
+ }
+ ]
+ }
+}
+```
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique rate limit ID |
+| `request_max_limit` | No | Maximum requests in window |
+| `request_reset_duration` | No | Window for request counter |
+| `token_max_limit` | No | Maximum tokens (input + output) in window |
+| `token_reset_duration` | No | Window for token counter |
+
+Attach a rate limit to a virtual key via `virtual_keys[].rate_limit_id`, or to a provider config via `virtual_keys[].provider_configs[].rate_limit_id`.
+
+---
+
+## Routing Rules
+
+Routing rules dynamically select the provider and model for each request based on a [CEL](https://cel.dev) expression. They are evaluated in priority order before the request is dispatched.
+
+```json
+{
+ "governance": {
+ "routing_rules": [
+ {
+ "id": "route-gpt4-to-azure",
+ "name": "Redirect GPT-4o to Azure",
+ "cel_expression": "request.model == 'gpt-4o'",
+ "targets": [
+ { "provider": "azure", "model": "gpt-4o", "weight": 1.0 }
+ ]
+ },
+ {
+ "id": "route-cost-split",
+ "name": "Split traffic 70/30 between providers",
+ "cel_expression": "true",
+ "targets": [
+ { "provider": "openai", "weight": 0.7 },
+ { "provider": "anthropic", "weight": 0.3 }
+ ]
+ }
+ ]
+ }
+}
+```
+
+### Rule Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique rule ID |
+| `name` | Yes | Human-readable name |
+| `cel_expression` | No | CEL expression. `"true"` matches every request |
+| `targets` | Yes | Weighted target list (weights must sum to `1.0`) |
+| `enabled` | No | Default `true` |
+| `priority` | No | Evaluation order within scope — lower numbers run first |
+| `scope` | No | `"global"` (default), `"team"`, `"customer"`, `"virtual_key"` |
+| `scope_id` | Conditional | Required when `scope` is not `"global"` |
+| `chain_rule` | No | If `true`, re-evaluates the chain after this rule matches |
+| `fallbacks` | No | Ordered fallback provider list if primary target fails |
+
+### Target Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `weight` | Yes | Fraction of traffic (all weights in a rule must sum to `1.0`) |
+| `provider` | No | Target provider. Omit to keep the incoming request's provider |
+| `model` | No | Target model. Omit to keep the incoming request's model |
+| `key_id` | No | Pin a specific API key by name |
+
+---
+
+## Customers & Teams
+
+Define organizational entities and attach budgets or rate limits to them:
+
+```json
+{
+ "governance": {
+ "customers": [
+ {
+ "id": "customer-acme",
+ "name": "Acme Corp",
+ "budget_id": "budget-acme-monthly",
+ "rate_limit_id": "rl-acme-hourly"
+ }
+ ],
+ "teams": [
+ {
+ "id": "team-ml",
+ "name": "ML Team",
+ "customer_id": "customer-acme",
+ "budget_id": "budget-team-ml"
+ }
+ ]
+ }
+}
+```
+
+---
+
+## Full Governance Example
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+ "client": {
+ "enforce_auth_on_inference": true
+ },
+
+ "governance": {
+ "auth_config": {
+ "is_enabled": true,
+ "admin_username": "env.BIFROST_ADMIN_USERNAME",
+ "admin_password": "env.BIFROST_ADMIN_PASSWORD"
+ },
+
+ "budgets": [
+ {
+ "id": "budget-platform",
+ "max_limit": 1000.00,
+ "reset_duration": "1M",
+ "virtual_key_id": "vk-platform"
+ }
+ ],
+
+ "rate_limits": [
+ {
+ "id": "rl-platform",
+ "request_max_limit": 5000,
+ "request_reset_duration": "1h",
+ "token_max_limit": 5000000,
+ "token_reset_duration": "1h"
+ }
+ ],
+
+ "virtual_keys": [
+ {
+ "id": "vk-platform",
+ "name": "platform-key",
+ "value": "env.VK_PLATFORM",
+ "is_active": true,
+ "rate_limit_id": "rl-platform",
+ "provider_configs": [
+ {
+ "provider": "openai",
+ "allowed_models": ["*"],
+ "key_ids": ["*"],
+ "weight": 1
+ }
+ ]
+ }
+ ],
+
+ "routing_rules": [
+ {
+ "id": "fallback-to-anthropic",
+ "name": "Fallback on error",
+ "cel_expression": "true",
+ "targets": [{ "provider": "openai", "weight": 1.0 }],
+ "fallbacks": ["anthropic"]
+ }
+ ]
+ },
+
+ "providers": {
+ "openai": {
+ "keys": [{ "name": "openai-primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }]
+ },
+ "anthropic": {
+ "keys": [{ "name": "anthropic-primary", "value": "env.ANTHROPIC_API_KEY", "models": ["*"], "weight": 1.0 }]
+ }
+ },
+
+ "config_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost"
+ }
+ }
+}
+```
diff --git a/docs/deployment-guides/config-json/guardrails.mdx b/docs/deployment-guides/config-json/guardrails.mdx
new file mode 100644
index 0000000000..f6258ca872
--- /dev/null
+++ b/docs/deployment-guides/config-json/guardrails.mdx
@@ -0,0 +1,291 @@
+---
+title: "Guardrails"
+description: "Configure content moderation and policy enforcement in config.json using guardrails_config"
+icon: "shield-halved"
+---
+
+
+Guardrails are an **enterprise-only** feature and require the enterprise Bifrost image.
+
+
+Guardrails are configured under `guardrails_config` in `config.json`. The configuration has two parts:
+
+- **`guardrail_providers`** — the backend that performs the check. Rules link to providers by `id`.
+- **`guardrail_rules`** — CEL expressions that control when and where providers are invoked.
+
+---
+
+## Providers
+
+
+
+
+Runs entirely in-process with no external dependency. Patterns use RE2 syntax. Supports optional per-pattern flags: `i` (case-insensitive), `m` (multiline), `s` (dot-all).
+
+```json
+{
+ "guardrails_config": {
+ "guardrail_providers": [
+ {
+ "id": 1,
+ "provider_name": "regex",
+ "policy_name": "block-secrets",
+ "enabled": true,
+ "timeout": 5,
+ "config": {
+ "patterns": [
+ { "pattern": "sk-[A-Za-z0-9]{20,}", "description": "OpenAI API key" },
+ { "pattern": "AKIA[0-9A-Z]{16}", "description": "AWS access key" },
+ { "pattern": "gh[ps]_[A-Za-z0-9]{36}", "description": "GitHub token", "flags": "i" }
+ ],
+ "mode": "block"
+ }
+ }
+ ]
+ }
+}
+```
+
+
+
+
+```json
+{
+ "guardrails_config": {
+ "guardrail_providers": [
+ {
+ "id": 2,
+ "provider_name": "bedrock",
+ "policy_name": "content-filter",
+ "enabled": true,
+ "timeout": 15,
+ "config": {
+ "guardrail_arn": "arn:aws:bedrock:us-east-1::guardrail/abc123",
+ "guardrail_version": "DRAFT",
+ "region": "us-east-1",
+ "access_key": "env.AWS_ACCESS_KEY_ID",
+ "secret_key": "env.AWS_SECRET_ACCESS_KEY"
+ }
+ }
+ ]
+ }
+}
+```
+
+
+
+
+```json
+{
+ "guardrails_config": {
+ "guardrail_providers": [
+ {
+ "id": 3,
+ "provider_name": "azure",
+ "policy_name": "azure-content-safety",
+ "enabled": true,
+ "timeout": 10,
+ "config": {
+ "endpoint": "https://your-resource.cognitiveservices.azure.com",
+ "api_key": "env.AZURE_CONTENT_SAFETY_KEY",
+ "analyze_enabled": true,
+ "analyze_severity_threshold": "medium",
+ "jailbreak_shield_enabled": true,
+ "indirect_attack_shield_enabled": true,
+ "copyright_enabled": false,
+ "text_blocklist_enabled": false,
+ "blocklist_names": []
+ }
+ }
+ ]
+ }
+}
+```
+
+`analyze_severity_threshold` accepts `"low"`, `"medium"`, or `"high"`.
+
+
+
+
+```json
+{
+ "guardrails_config": {
+ "guardrail_providers": [
+ {
+ "id": 4,
+ "provider_name": "grayswan",
+ "policy_name": "grayswan-jailbreak",
+ "enabled": true,
+ "timeout": 15,
+ "config": {
+ "api_key": "env.GRAYSWAN_API_KEY",
+ "violation_threshold": 0.7,
+ "reasoning_mode": "standard",
+ "policy_id": "",
+ "policy_ids": [],
+ "rules": {}
+ }
+ }
+ ]
+ }
+}
+```
+
+
+
+
+### Provider Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique integer ID — referenced by rules via `provider_config_ids` |
+| `provider_name` | Yes | Backend: `"regex"`, `"bedrock"`, `"azure"`, `"grayswan"` |
+| `policy_name` | Yes | Human-readable policy label |
+| `enabled` | Yes | `true` to activate |
+| `timeout` | No | Execution timeout in seconds |
+| `config` | No | Provider-specific configuration object |
+
+---
+
+## Rules
+
+Rules are CEL expressions that fire when their condition matches. Available CEL variables:
+
+| Variable | Type | Description |
+|----------|------|-------------|
+| `model` | `string` | Model name from the request |
+| `provider` | `string` | Provider name (e.g. `"openai"`) |
+| `headers` | `map` | HTTP request headers |
+| `params` | `map` | Query parameters |
+| `customer` | `string` | Customer ID |
+| `team` | `string` | Team ID |
+| `user` | `string` | User ID |
+
+```json
+{
+ "guardrails_config": {
+ "guardrail_rules": [
+ {
+ "id": 101,
+ "name": "block-secrets-input",
+ "description": "Block prompts containing credentials",
+ "enabled": true,
+ "cel_expression": "true",
+ "apply_to": "input",
+ "sampling_rate": 100,
+ "timeout": 10,
+ "provider_config_ids": [1]
+ },
+ {
+ "id": 102,
+ "name": "content-safety-gpt4o-output",
+ "enabled": true,
+ "cel_expression": "model == 'gpt-4o'",
+ "apply_to": "output",
+ "sampling_rate": 100,
+ "timeout": 15,
+ "provider_config_ids": [3]
+ },
+ {
+ "id": 103,
+ "name": "grayswan-openai-partial",
+ "enabled": true,
+ "cel_expression": "provider == 'openai'",
+ "apply_to": "input",
+ "sampling_rate": 50,
+ "timeout": 20,
+ "provider_config_ids": [4]
+ }
+ ]
+ }
+}
+```
+
+### Rule Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique integer ID |
+| `name` | Yes | Human-readable name |
+| `description` | No | Optional description |
+| `enabled` | Yes | `true` to activate |
+| `cel_expression` | Yes | CEL boolean expression. `"true"` matches every request |
+| `apply_to` | Yes | `"input"`, `"output"`, or `"both"` |
+| `sampling_rate` | No | `0`–`100`; percentage of requests to evaluate (default: `100`) |
+| `timeout` | No | Rule timeout in seconds |
+| `provider_config_ids` | No | `id` values of providers to invoke when this rule matches. Multiple providers run in parallel |
+
+---
+
+## Full Example
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+ "providers": {
+ "openai": {
+ "keys": [{ "name": "primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }]
+ }
+ },
+
+ "guardrails_config": {
+ "guardrail_providers": [
+ {
+ "id": 1,
+ "provider_name": "regex",
+ "policy_name": "block-secrets",
+ "enabled": true,
+ "timeout": 5,
+ "config": {
+ "patterns": [
+ { "pattern": "sk-[A-Za-z0-9]{20,}", "description": "OpenAI API key" },
+ { "pattern": "AKIA[0-9A-Z]{16}", "description": "AWS access key" }
+ ],
+ "mode": "block"
+ }
+ },
+ {
+ "id": 2,
+ "provider_name": "azure",
+ "policy_name": "content-safety",
+ "enabled": true,
+ "timeout": 10,
+ "config": {
+ "endpoint": "https://your-resource.cognitiveservices.azure.com",
+ "api_key": "env.AZURE_CONTENT_SAFETY_KEY",
+ "analyze_enabled": true,
+ "analyze_severity_threshold": "medium",
+ "jailbreak_shield_enabled": true,
+ "indirect_attack_shield_enabled": false
+ }
+ }
+ ],
+ "guardrail_rules": [
+ {
+ "id": 101,
+ "name": "block-secrets-input",
+ "description": "Block prompts leaking credentials",
+ "enabled": true,
+ "cel_expression": "true",
+ "apply_to": "input",
+ "sampling_rate": 100,
+ "timeout": 10,
+ "provider_config_ids": [1]
+ },
+ {
+ "id": 102,
+ "name": "content-safety-both",
+ "description": "Azure content safety on all traffic",
+ "enabled": true,
+ "cel_expression": "true",
+ "apply_to": "both",
+ "sampling_rate": 100,
+ "timeout": 15,
+ "provider_config_ids": [2]
+ }
+ ]
+ }
+}
+```
diff --git a/docs/deployment-guides/config-json/plugins.mdx b/docs/deployment-guides/config-json/plugins.mdx
new file mode 100644
index 0000000000..847f290e02
--- /dev/null
+++ b/docs/deployment-guides/config-json/plugins.mdx
@@ -0,0 +1,318 @@
+---
+title: "Plugins"
+description: "Configure Bifrost plugins in config.json — semantic cache, OpenTelemetry, Maxim, Datadog, and custom plugins"
+icon: "puzzle-piece"
+---
+
+
+**The `plugins` array only controls explicitly opt-in plugins**: `semantic_cache`, `otel`, `maxim`, `datadog` (enterprise), and custom plugins.
+
+**Telemetry, logging, and governance are auto-loaded built-ins** — they are always active and configured via the `client` block and dedicated top-level keys, not the `plugins` array.
+
+
+---
+
+## Auto-Loaded Built-ins
+
+These plugins start automatically. You do **not** add them to the `plugins` array.
+
+| Plugin | Always active? | How to configure |
+|--------|---------------|-----------------|
+| **Telemetry** (Prometheus `/metrics`) | Yes, always | `client.prometheus_labels` for custom labels; push gateway via `plugins` entry once DB-backed mode is running |
+| **Logging** | When `client.enable_logging: true` and `logs_store` is configured | `client.enable_logging`, `client.disable_content_logging`, `client.logging_headers` |
+| **Governance** | Yes, always (OSS) | `client.enforce_auth_on_inference` for VK enforcement; `governance.*` for virtual keys / budgets / routing rules |
+
+See [Client Configuration](/deployment-guides/config-json/client) and [Governance](/deployment-guides/config-json/governance) for full details.
+
+---
+
+## Plugin Array Structure
+
+Every entry in the `plugins` array supports these common fields:
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | Yes | Plugin name |
+| `enabled` | boolean | Yes | Enable or disable this plugin |
+| `config` | object | Varies | Plugin-specific configuration |
+| `path` | string | No | Path to a custom plugin binary or WASM file |
+| `version` | integer | No | 🛑 **DB-Backed Only.** Plugin metadata persisted on `TablePlugin` rather than `PluginConfig`. Ignored in `config.json`. Used in UI/DB workflows to force refresh/reload. |
+| `placement` | string | No | 🛑 **DB-Backed Only.** Execution metadata (`"pre_builtin"`, `"builtin"`, `"post_builtin"`) persisted on `TablePlugin`. Ignored in `config.json`. Relevant for dynamic plugin ordering in UI/DB mode. |
+| `order` | integer | No | 🛑 **DB-Backed Only.** Execution metadata persisted on `TablePlugin`. Ignored in `config.json`. Within a placement group, lower values run earlier. |
+
+
+`name`, `enabled`, `path`, and `config` are the core plugin config fields parsed from `config.json`. `version`, `placement`, and `order` are **not valid `config.json` keys**; they are DB-backed metadata persisted on `TablePlugin` and are only applicable when managing plugins dynamically via the UI or Database.
+
+
+---
+
+
+
+
+
+### Semantic Cache
+
+Caches LLM responses by semantic similarity. Returns a cached response when an incoming request is semantically close enough to a previous one.
+
+Requires a [vector store](/deployment-guides/config-json/storage#vector_store) to be configured.
+
+| Field | Required | Default | Description |
+|-------|----------|---------|-------------|
+| `config.dimension` | Yes | — | Embedding dimension. Use `1` for hash-based (exact) caching without an embedding provider |
+| `config.provider` | No | — | Provider for generating embeddings (required for semantic mode) |
+| `config.embedding_model` | No | — | Model for embeddings (required when `provider` is set) |
+| `config.threshold` | No | `0.8` | Cosine similarity threshold for a cache hit (0.0–1.0) |
+| `config.ttl` | No | `300` | Cache entry TTL in seconds (or a duration string like `"1h"`) |
+| `config.cache_by_model` | No | `true` | Include model in cache key |
+| `config.cache_by_provider` | No | `true` | Include provider in cache key |
+| `config.exclude_system_prompt` | No | `false` | Exclude system prompt from cache key |
+| `config.conversation_history_threshold` | No | `3` | Skip caching for requests with more messages than this |
+| `config.default_cache_key` | No | — | Default cache key when no `x-bf-cache-key` header is sent |
+
+**Semantic mode** (embedding-based similarity search):
+
+```json
+{
+ "plugins": [
+ {
+ "name": "semantic_cache",
+ "enabled": true,
+ "config": {
+ "provider": "openai",
+ "embedding_model": "text-embedding-3-small",
+ "dimension": 1536,
+ "threshold": 0.85,
+ "ttl": 300,
+ "cache_by_model": true,
+ "cache_by_provider": true
+ }
+ }
+ ]
+}
+```
+
+**Hash mode** (exact-match caching, no embedding provider needed):
+
+```json
+{
+ "plugins": [
+ {
+ "name": "semantic_cache",
+ "enabled": true,
+ "config": {
+ "dimension": 1,
+ "ttl": 1800
+ }
+ }
+ ]
+}
+```
+
+
+You must also configure a `vector_store` in `config.json`. See [Storage — vector_store](/deployment-guides/config-json/storage#vector_store).
+
+
+
+
+
+
+### OpenTelemetry (OTel)
+
+Exports distributed traces to any OTel-compatible collector (Jaeger, Zipkin, Tempo, Datadog via OTLP, etc.).
+
+| Field | Required | Default | Description |
+|-------|----------|---------|-------------|
+| `config.collector_url` | Yes | — | OTLP collector endpoint |
+| `config.trace_type` | Yes | — | Trace format: `"genai_extension"`, `"vercel"`, or `"open_inference"` |
+| `config.protocol` | Yes | — | `"http"` or `"grpc"` |
+| `config.service_name` | No | `"bifrost"` | Service name reported to the collector |
+| `config.metrics_enabled` | No | `false` | Enable push-based OTLP metrics export |
+| `config.metrics_endpoint` | No | — | OTLP metrics endpoint URL |
+| `config.metrics_push_interval` | No | `15` | Metrics push interval in seconds |
+| `config.headers` | No | — | Custom headers for the collector (supports `env.` prefix) |
+| `config.insecure` | No | `false` | Skip TLS verification |
+| `config.tls_ca_cert` | No | — | Path to TLS CA certificate |
+
+```json
+{
+ "plugins": [
+ {
+ "name": "otel",
+ "enabled": true,
+ "config": {
+ "collector_url": "http://otel-collector:4318",
+ "trace_type": "genai_extension",
+ "protocol": "http",
+ "service_name": "bifrost-gateway"
+ }
+ }
+ ]
+}
+```
+
+**With authentication headers:**
+
+```json
+{
+ "plugins": [
+ {
+ "name": "otel",
+ "enabled": true,
+ "config": {
+ "collector_url": "https://otel.example.com:4318",
+ "trace_type": "open_inference",
+ "protocol": "http",
+ "service_name": "bifrost",
+ "headers": {
+ "Authorization": "env.OTEL_AUTH_HEADER"
+ }
+ }
+ }
+ ]
+}
+```
+
+**With OTLP metrics export:**
+
+```json
+{
+ "plugins": [
+ {
+ "name": "otel",
+ "enabled": true,
+ "config": {
+ "collector_url": "http://otel-collector:4318",
+ "trace_type": "genai_extension",
+ "protocol": "http",
+ "metrics_enabled": true,
+ "metrics_endpoint": "http://otel-collector:4318/v1/metrics",
+ "metrics_push_interval": 30
+ }
+ }
+ ]
+}
+```
+
+
+
+
+
+### Maxim Observability
+
+Sends request traces to the [Maxim](https://www.getmaxim.ai) observability platform.
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `config.api_key` | Yes | Maxim API key (use `env.` prefix) |
+| `config.log_repo_id` | No | Default Maxim logger repository ID |
+
+```json
+{
+ "plugins": [
+ {
+ "name": "maxim",
+ "enabled": true,
+ "config": {
+ "api_key": "env.MAXIM_API_KEY",
+ "log_repo_id": "your-log-repo-id"
+ }
+ }
+ ]
+}
+```
+
+
+
+
+
+### Datadog
+
+
+Datadog is an **enterprise-only** plugin and is silently ignored in OSS builds.
+
+
+Sends APM traces and metrics to a Datadog Agent.
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `config.agent_addr` | `"localhost:8126"` | Datadog Agent address for APM traces |
+| `config.service_name` | `"bifrost"` | Service name in Datadog |
+| `config.env` | — | Environment tag (e.g. `"production"`, `"staging"`) |
+| `config.version` | — | Service version tag |
+| `config.enable_traces` | `true` | Enable APM trace collection |
+| `config.custom_tags` | `{}` | Additional key/value tags for all traces and metrics |
+
+```json
+{
+ "plugins": [
+ {
+ "name": "datadog",
+ "enabled": true,
+ "config": {
+ "agent_addr": "datadog-agent:8126",
+ "service_name": "bifrost",
+ "env": "production",
+ "enable_traces": true,
+ "custom_tags": {
+ "team": "platform",
+ "region": "us-east-1"
+ }
+ }
+ }
+ ]
+}
+```
+
+
+
+
+
+---
+
+## Custom / Dynamic Plugins
+
+Load a custom Go plugin binary or WASM plugin at startup using the `path` field. Custom plugins must implement one of the Bifrost plugin interfaces.
+
+```json
+{
+ "plugins": [
+ {
+ "name": "my-custom-auth",
+ "enabled": true,
+ "path": "/app/plugins/my-custom-auth.so",
+ "config": {
+ "auth_endpoint": "env.AUTH_SERVICE_URL"
+ }
+ }
+ ]
+}
+```
+
+**WASM plugin:**
+
+```json
+{
+ "plugins": [
+ {
+ "name": "my-wasm-plugin",
+ "enabled": true,
+ "path": "/app/plugins/my-plugin.wasm",
+ "config": {}
+ }
+ ]
+}
+```
+
+See [Writing Go Plugins](/plugins/writing-go-plugin) and [Writing WASM Plugins](/plugins/writing-wasm-plugin) for implementation guides.
+
+**Placement and ordering (DB-backed only):**
+
+When creating plugins dynamically via the DB/UI (rather than `config.json`), you can specify their execution order:
+
+| `placement` | When it runs |
+|-------------|-------------|
+| `pre_builtin` | Before all built-in plugins |
+| `builtin` | Alongside built-in plugins (by `order`) |
+| `post_builtin` | After all built-in plugins (default) |
+
+Within a placement group, lower `order` values run earlier.
diff --git a/docs/deployment-guides/config-json/providers.mdx b/docs/deployment-guides/config-json/providers.mdx
new file mode 100644
index 0000000000..ca07e0e5f8
--- /dev/null
+++ b/docs/deployment-guides/config-json/providers.mdx
@@ -0,0 +1,755 @@
+---
+title: "Provider Setup"
+description: "Configure LLM providers in config.json — API keys, cloud-native auth, per-provider network settings, and self-hosted endpoints"
+icon: "plug"
+---
+
+All providers are configured under `providers` in `config.json`. Each provider entry contains a `keys` array where every key has a `name`, `value`, `models`, and `weight`, plus optional provider-specific config objects.
+
+**Supplying credentials:**
+
+Use the `env.` prefix to reference environment variables — never put API keys directly in `config.json`:
+
+```json
+{
+ "providers": {
+ "openai": {
+ "keys": [
+ {
+ "name": "primary",
+ "value": "env.OPENAI_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ]
+ }
+ }
+}
+```
+
+---
+
+## Common Provider Fields
+
+Every key object supports these fields:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Unique name for this key (used in logs and virtual key pin) |
+| `value` | string | API key value or `env.VAR_NAME` reference |
+| `models` | array | Models this key serves. `["*"]` = all models |
+| `weight` | float | Load balancing weight. Higher = more traffic |
+| `aliases` | object | Map logical name → actual model name for this key |
+| `use_for_batch_api` | boolean | Mark key as eligible for batch API calls |
+
+Per-provider `network_config` options (applies to all standard providers):
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `default_request_timeout_in_seconds` | integer | Per-request timeout |
+| `max_retries` | integer | Retry attempts on transient errors |
+| `retry_backoff_initial` | integer | Initial backoff in milliseconds |
+| `retry_backoff_max` | integer | Maximum backoff in milliseconds |
+| `max_conns_per_host` | integer | Max TCP connections to the provider endpoint (default: 5000) |
+| `extra_headers` | object | Static headers added to every provider request |
+| `stream_idle_timeout_in_seconds` | integer | Idle timeout per stream chunk (default: 60) |
+| `insecure_skip_verify` | boolean | Disable TLS verification (last resort only) |
+| `ca_cert_pem` | string | PEM-encoded CA for self-signed or private CA endpoints |
+
+Concurrency and buffering per provider:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `concurrency_and_buffer_size.concurrency` | integer | Max concurrent requests to this provider |
+| `concurrency_and_buffer_size.buffer_size` | integer | Request queue depth |
+
+---
+
+
+
+
+
+### OpenAI
+
+Supports multiple keys with weighted load balancing. Mark one key with `use_for_batch_api: true` to designate it for the Batch API.
+
+```json
+{
+ "providers": {
+ "openai": {
+ "keys": [
+ {
+ "name": "openai-primary",
+ "value": "env.OPENAI_KEY_1",
+ "models": ["*"],
+ "weight": 2.0
+ },
+ {
+ "name": "openai-secondary",
+ "value": "env.OPENAI_KEY_2",
+ "models": ["gpt-4o-mini"],
+ "weight": 1.0
+ },
+ {
+ "name": "openai-batch",
+ "value": "env.OPENAI_KEY_BATCH",
+ "models": ["*"],
+ "weight": 1.0,
+ "use_for_batch_api": true
+ }
+ ],
+ "network_config": {
+ "default_request_timeout_in_seconds": 120,
+ "max_retries": 3,
+ "retry_backoff_initial": 500,
+ "retry_backoff_max": 5000
+ }
+ }
+ }
+}
+```
+
+
+
+
+
+### Anthropic
+
+```json
+{
+ "providers": {
+ "anthropic": {
+ "keys": [
+ {
+ "name": "anthropic-primary",
+ "value": "env.ANTHROPIC_KEY_1",
+ "models": ["*"],
+ "weight": 1.0
+ },
+ {
+ "name": "anthropic-secondary",
+ "value": "env.ANTHROPIC_KEY_2",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ],
+ "network_config": {
+ "default_request_timeout_in_seconds": 180
+ }
+ }
+ }
+}
+```
+
+**Override Anthropic beta headers** (optional):
+
+```json
+{
+ "providers": {
+ "anthropic": {
+ "keys": [
+ {
+ "name": "primary",
+ "value": "env.ANTHROPIC_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ],
+ "network_config": {
+ "beta_header_overrides": {
+ "redact-thinking-": true
+ }
+ }
+ }
+ }
+}
+```
+
+
+
+
+
+### Azure OpenAI
+
+Azure requires `azure_key_config` on every key with `endpoint` and `api_version`. List your Azure deployment names in `models` — Bifrost routes requests using the model name as the deployment name. If your deployment names differ from the model names you use in requests, add an `aliases` map on the key.
+
+
+
+
+```json
+{
+ "providers": {
+ "azure": {
+ "keys": [
+ {
+ "name": "azure-primary",
+ "value": "env.AZURE_API_KEY",
+ "models": ["gpt-4o", "gpt-4o-mini"],
+ "weight": 1.0,
+ "azure_key_config": {
+ "endpoint": "env.AZURE_ENDPOINT",
+ "api_version": "env.AZURE_API_VERSION"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+Set environment variables:
+
+```bash
+export AZURE_API_KEY="your-azure-api-key"
+export AZURE_ENDPOINT="https://your-resource.openai.azure.com"
+export AZURE_API_VERSION="2024-10-21"
+```
+
+
+
+
+When `value` is empty or omitted, Bifrost uses `DefaultAzureCredential` — which resolves credentials from Workload Identity, VM managed identity, or `az login`.
+
+```json
+{
+ "providers": {
+ "azure": {
+ "keys": [
+ {
+ "name": "azure-workload-identity",
+ "value": "",
+ "models": ["gpt-4o"],
+ "weight": 1.0,
+ "azure_key_config": {
+ "endpoint": "env.AZURE_ENDPOINT",
+ "api_version": "env.AZURE_API_VERSION"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+**Deployment name aliases** — when your Azure deployment names differ from the model names in requests, use `aliases`:
+
+```json
+{
+ "providers": {
+ "azure": {
+ "keys": [
+ {
+ "name": "azure-primary",
+ "value": "env.AZURE_API_KEY",
+ "models": ["gpt-4o"],
+ "weight": 1.0,
+ "aliases": {
+ "gpt-4o": "gpt-4o-prod-deployment"
+ },
+ "azure_key_config": {
+ "endpoint": "env.AZURE_ENDPOINT",
+ "api_version": "env.AZURE_API_VERSION"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+**Multi-region failover** (two keys, different regions):
+
+```json
+{
+ "providers": {
+ "azure": {
+ "keys": [
+ {
+ "name": "eastus",
+ "value": "env.AZURE_KEY_EAST",
+ "models": ["gpt-4o"],
+ "weight": 1.0,
+ "azure_key_config": {
+ "endpoint": "env.AZURE_ENDPOINT_EAST",
+ "api_version": "env.AZURE_API_VERSION"
+ }
+ },
+ {
+ "name": "westus",
+ "value": "env.AZURE_KEY_WEST",
+ "models": ["gpt-4o"],
+ "weight": 1.0,
+ "azure_key_config": {
+ "endpoint": "env.AZURE_ENDPOINT_WEST",
+ "api_version": "env.AZURE_API_VERSION"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+
+### AWS Bedrock
+
+Bedrock requires `bedrock_key_config` with at minimum a `region`. Three auth modes:
+
+
+
+
+```json
+{
+ "providers": {
+ "bedrock": {
+ "keys": [
+ {
+ "name": "bedrock-static",
+ "value": "",
+ "models": ["*"],
+ "weight": 1.0,
+ "bedrock_key_config": {
+ "region": "us-east-1",
+ "access_key": "env.AWS_ACCESS_KEY_ID",
+ "secret_key": "env.AWS_SECRET_ACCESS_KEY"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+When only `region` is set, Bifrost inherits credentials from the AWS SDK default chain — IRSA (IAM Roles for Service Accounts), EC2 instance profile, or `AWS_*` env vars.
+
+```json
+{
+ "providers": {
+ "bedrock": {
+ "keys": [
+ {
+ "name": "bedrock-iam",
+ "value": "",
+ "models": ["*"],
+ "weight": 1.0,
+ "bedrock_key_config": {
+ "region": "us-east-1"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+```json
+{
+ "providers": {
+ "bedrock": {
+ "keys": [
+ {
+ "name": "bedrock-assumerole",
+ "value": "",
+ "models": ["*"],
+ "weight": 1.0,
+ "bedrock_key_config": {
+ "region": "us-west-2",
+ "role_arn": "env.AWS_ROLE_ARN",
+ "external_id": "env.AWS_EXTERNAL_ID",
+ "session_name": "bifrost-session"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+**Model aliases** (map logical names to Bedrock inference profile IDs):
+
+```json
+{
+ "bedrock_key_config": {
+ "region": "us-east-1"
+ },
+ "aliases": {
+ "claude-sonnet": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
+ "claude-haiku": "us.anthropic.claude-3-5-haiku-20241022-v1:0"
+ }
+}
+```
+
+**Batch API — S3 configuration:**
+
+```json
+{
+ "bedrock_key_config": {
+ "region": "us-east-1",
+ "access_key": "env.AWS_ACCESS_KEY_ID",
+ "secret_key": "env.AWS_SECRET_ACCESS_KEY",
+ "batch_s3_config": {
+ "buckets": [
+ {
+ "bucket_name": "my-bedrock-batch-bucket",
+ "prefix": "batch/",
+ "is_default": true
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+
+### Google Vertex AI
+
+Vertex requires `vertex_key_config` with `project_id` and `region`. Two auth modes:
+
+
+
+
+```json
+{
+ "providers": {
+ "vertex": {
+ "keys": [
+ {
+ "name": "vertex-sa",
+ "value": "",
+ "models": ["*"],
+ "weight": 1.0,
+ "vertex_key_config": {
+ "project_id": "env.VERTEX_PROJECT_ID",
+ "region": "us-central1",
+ "auth_credentials": "env.VERTEX_AUTH_CREDENTIALS"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+`VERTEX_AUTH_CREDENTIALS` should contain the base64-encoded service account JSON.
+
+
+
+
+When `auth_credentials` is omitted, Bifrost calls `google.FindDefaultCredentials` — which resolves to GKE Workload Identity, GCE metadata server, or `gcloud auth application-default login`.
+
+```json
+{
+ "providers": {
+ "vertex": {
+ "keys": [
+ {
+ "name": "vertex-workload-identity",
+ "value": "",
+ "models": ["*"],
+ "weight": 1.0,
+ "vertex_key_config": {
+ "project_id": "my-gcp-project",
+ "region": "us-central1"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+
+
+
+
+### Standard API-Key Providers
+
+These providers follow the same simple pattern — one or more keys with weights. Replace the provider name and env var name accordingly.
+
+```json
+{
+ "providers": {
+ "groq": {
+ "keys": [
+ {
+ "name": "groq-primary",
+ "value": "env.GROQ_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ]
+ },
+ "gemini": {
+ "keys": [
+ {
+ "name": "gemini-primary",
+ "value": "env.GEMINI_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ]
+ },
+ "mistral": {
+ "keys": [
+ {
+ "name": "mistral-primary",
+ "value": "env.MISTRAL_API_KEY",
+ "models": ["*"],
+ "weight": 1.0
+ }
+ ]
+ },
+ "cohere": {
+ "keys": [{ "name": "cohere-main", "value": "env.COHERE_API_KEY", "models": ["*"], "weight": 1.0 }]
+ },
+ "perplexity": {
+ "keys": [{ "name": "perplexity-main", "value": "env.PERPLEXITY_API_KEY", "models": ["*"], "weight": 1.0 }]
+ },
+ "xai": {
+ "keys": [{ "name": "xai-main", "value": "env.XAI_API_KEY", "models": ["*"], "weight": 1.0 }]
+ },
+ "cerebras": {
+ "keys": [{ "name": "cerebras-main", "value": "env.CEREBRAS_API_KEY", "models": ["*"], "weight": 1.0 }]
+ },
+ "openrouter": {
+ "keys": [{ "name": "openrouter-main", "value": "env.OPENROUTER_API_KEY", "models": ["*"], "weight": 1.0 }]
+ },
+ "nebius": {
+ "keys": [{ "name": "nebius-main", "value": "env.NEBIUS_API_KEY", "models": ["*"], "weight": 1.0 }]
+ }
+ }
+}
+```
+
+
+
+
+
+### Self-Hosted Providers
+
+Self-hosted providers point to a URL you operate. No API key is typically required (`"value": ""`).
+
+
+
+
+```json
+{
+ "providers": {
+ "ollama": {
+ "keys": [
+ {
+ "name": "ollama-local",
+ "value": "",
+ "models": ["*"],
+ "weight": 1.0,
+ "ollama_key_config": {
+ "url": "http://localhost:11434"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+Using an env var for the URL (useful across environments):
+
+```json
+{
+ "ollama_key_config": {
+ "url": "env.OLLAMA_URL"
+ }
+}
+```
+
+
+
+
+vLLM instances are model-specific — one key per served model:
+
+```json
+{
+ "providers": {
+ "vllm": {
+ "keys": [
+ {
+ "name": "vllm-llama3-70b",
+ "value": "",
+ "models": ["llama-3-70b"],
+ "weight": 1.0,
+ "vllm_key_config": {
+ "url": "http://vllm-server:8000",
+ "model_name": "meta-llama/Meta-Llama-3-70B-Instruct"
+ }
+ },
+ {
+ "name": "vllm-mistral",
+ "value": "",
+ "models": ["mistral-7b"],
+ "weight": 1.0,
+ "vllm_key_config": {
+ "url": "http://vllm-mistral:8000",
+ "model_name": "mistralai/Mistral-7B-Instruct-v0.3"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+```json
+{
+ "providers": {
+ "sgl": {
+ "keys": [
+ {
+ "name": "sgl-main",
+ "value": "",
+ "models": ["*"],
+ "weight": 1.0,
+ "sgl_key_config": {
+ "url": "http://sgl-router:30000"
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+These providers use `aliases` to map logical model names to provider-specific IDs:
+
+```json
+{
+ "providers": {
+ "huggingface": {
+ "keys": [
+ {
+ "name": "hf-main",
+ "value": "env.HF_API_KEY",
+ "models": ["llama-3", "mixtral"],
+ "weight": 1.0,
+ "aliases": {
+ "llama-3": "meta-llama/Meta-Llama-3-8B-Instruct",
+ "mixtral": "mistralai/Mixtral-8x7B-Instruct-v0.1"
+ }
+ }
+ ]
+ },
+ "replicate": {
+ "keys": [
+ {
+ "name": "replicate-main",
+ "value": "env.REPLICATE_API_KEY",
+ "models": ["llama-3"],
+ "weight": 1.0,
+ "aliases": {
+ "llama-3": "meta/meta-llama-3-70b-instruct"
+ },
+ "replicate_key_config": {
+ "use_deployments_endpoint": false
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+
+
+
+
+
+
+
+---
+
+## Proxy Configuration
+
+Route provider traffic through an HTTP or SOCKS5 proxy:
+
+```json
+{
+ "providers": {
+ "openai": {
+ "keys": [
+ { "name": "primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }
+ ],
+ "proxy_config": {
+ "type": "http",
+ "url": "http://proxy.corp.example.com:3128",
+ "username": "env.PROXY_USER",
+ "password": "env.PROXY_PASS"
+ }
+ }
+ }
+}
+```
+
+| Field | Type | Options |
+|-------|------|---------|
+| `proxy_config.type` | string | `"none"`, `"http"`, `"socks5"`, `"environment"` |
+| `proxy_config.url` | string | Proxy server URL |
+| `proxy_config.username` | string | Proxy auth username |
+| `proxy_config.password` | string | Proxy auth password (`env.` supported) |
+| `proxy_config.ca_cert_pem` | string | PEM CA for TLS-intercepting proxies |
+
+Use `"type": "environment"` to pick up `HTTP_PROXY` / `HTTPS_PROXY` env vars automatically.
+
+---
+
+## Multi-Provider Example
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "providers": {
+ "openai": {
+ "keys": [
+ { "name": "openai-primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 2.0 }
+ ]
+ },
+ "anthropic": {
+ "keys": [
+ { "name": "anthropic-primary", "value": "env.ANTHROPIC_API_KEY", "models": ["*"], "weight": 1.0 }
+ ]
+ },
+ "groq": {
+ "keys": [
+ { "name": "groq-primary", "value": "env.GROQ_API_KEY", "models": ["*"], "weight": 1.0 }
+ ]
+ }
+ }
+}
+```
+
+With three providers and the weights above, traffic is distributed: 50% OpenAI, 25% Anthropic, 25% Groq. If any provider returns an error, Bifrost automatically retries on the next key or provider.
diff --git a/docs/deployment-guides/config-json/schema-reference.mdx b/docs/deployment-guides/config-json/schema-reference.mdx
new file mode 100644
index 0000000000..45b9b826ce
--- /dev/null
+++ b/docs/deployment-guides/config-json/schema-reference.mdx
@@ -0,0 +1,202 @@
+---
+title: "Schema Reference"
+description: "All top-level keys available in config.json, their types, and where each is documented"
+icon: "brackets-curly"
+---
+
+
+The live schema is published at [`https://www.getbifrost.ai/schema`](https://www.getbifrost.ai/schema). Add `"$schema": "https://www.getbifrost.ai/schema"` to your `config.json` for IDE autocomplete and inline validation.
+
+
+This page is a concise reference for every top-level key in `config.json`. Click the **Guide** links for full field-by-field documentation.
+
+---
+
+## Top-Level Keys
+
+| Key | Type | Description | Guide |
+|-----|------|-------------|-------|
+| `$schema` | string | Schema URL for IDE validation. Set to `"https://www.getbifrost.ai/schema"` | — |
+| `encryption_key` | string | AES-256 key (derived via Argon2id). Accepts `env.VAR` prefix. Also read from `BIFROST_ENCRYPTION_KEY` env var | [Client](/deployment-guides/config-json/client#encryption-key) |
+| `client` | object | Worker pool, logging, CORS, auth enforcement, header filtering, MCP, compat shims | [Client](/deployment-guides/config-json/client) |
+| `providers` | object | LLM provider API keys, network settings, concurrency | [Providers](/deployment-guides/config-json/providers) |
+| `governance` | object | Admin auth, virtual keys, budgets, rate limits, routing rules, customers, teams | [Governance](/deployment-guides/config-json/governance) |
+| `guardrails_config` | object | Content moderation providers and CEL-based rules *(enterprise only)* | [Guardrails](/deployment-guides/config-json/guardrails) |
+| `config_store` | object | Configuration database backend — SQLite, PostgreSQL, or disabled (file-only mode) | [Storage](/deployment-guides/config-json/storage#config_store) |
+| `logs_store` | object | Request/response log database — SQLite, PostgreSQL + optional S3/GCS offload | [Storage](/deployment-guides/config-json/storage#logs_store) |
+| `vector_store` | object | Vector database for semantic cache — Weaviate, Redis, Qdrant, Pinecone, Valkey | [Storage](/deployment-guides/config-json/storage#vector_store) |
+| `plugins` | array | Opt-in plugins: `semantic_cache`, `otel`, `maxim`, `datadog`, custom | [Plugins](/deployment-guides/config-json/plugins) |
+| `framework` | object | Model pricing catalog URL and sync interval | [Framework](#framework) |
+| `mcp` | object | MCP server and tool configuration | — |
+| `websocket` | object | WebSocket / Realtime API connection pool tuning | [WebSocket](#websocket) |
+| `auth_config` | object | **Deprecated** — use `governance.auth_config` | [Client](/deployment-guides/config-json/client#authentication) |
+
+---
+
+## `version`
+
+Controls how empty arrays in allow-list fields (`models`, `allowed_models`, `key_ids`, `tools_to_execute`) are interpreted:
+
+| Value | Behaviour |
+|-------|-----------|
+| `2` *(default, v1.5.0+)* | Empty array = **deny all**; `["*"]` = allow all |
+| `1` *(v1.4.x compat)* | Empty array = **allow all** |
+
+Omitting `version` uses v2 semantics. Set `"version": 1` only if you are migrating from v1.4.x and need the old behaviour temporarily.
+
+---
+
+## `client`
+
+Controls the worker pool, logging pipeline, security, and SDK shims. All fields are optional.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `initial_pool_size` | integer | `300` | Pre-allocated goroutines per provider queue |
+| `drop_excess_requests` | boolean | `false` | Return HTTP 429 when queue is full |
+| `enable_logging` | boolean | `true`* | Persist request/response logs (`*` auto-enabled when `logs_store` is set) |
+| `disable_content_logging` | boolean | `false` | Strip message content from logs |
+| `log_retention_days` | integer | `365` | Days to retain log entries |
+| `logging_headers` | array | `[]` | HTTP headers to capture in log metadata |
+| `enforce_auth_on_inference` | boolean | `false` | Require a virtual key on every `/v1/*` request |
+| `allow_direct_keys` | boolean | `false` | Allow callers to pass provider API keys directly |
+| `allowed_origins` | array | `["*"]` | CORS allowed origins |
+| `max_request_body_size_mb` | integer | `100` | Maximum request body in MB |
+| `whitelisted_routes` | array | `[]` | Routes that bypass auth middleware |
+| `allowed_headers` | array | `[]` | Additional headers permitted for CORS/WebSocket |
+| `required_headers` | array | `[]` | Headers that must be present on every request |
+| `header_filter_config` | object | — | `allowlist` / `denylist` for `x-bf-eh-*` forwarded headers |
+| `prometheus_labels` | array | `[]` | Custom labels for all Prometheus metrics |
+| `compat` | object | — | SDK compatibility shims (`should_drop_params`, `convert_text_to_chat`, etc.) |
+| `mcp_agent_depth` | integer | `10` | Max tool-call recursion depth |
+| `mcp_tool_execution_timeout` | integer | `30` | Per-tool execution timeout in seconds |
+| `mcp_tool_sync_interval` | integer | `10` | Tool sync interval in minutes (`0` = disabled) |
+| `mcp_disable_auto_tool_inject` | boolean | `false` | Disable automatic MCP tool injection |
+| `async_job_result_ttl` | integer | `3600` | TTL for async job results in seconds |
+| `disable_db_pings_in_health` | boolean | `false` | Exclude DB connectivity from `/health` |
+| `routing_chain_max_depth` | integer | `10` | Max routing rule chain evaluation depth |
+
+Full documentation: [Client Configuration](/deployment-guides/config-json/client).
+
+---
+
+## `providers`
+
+Keyed by provider name. Each entry contains a `keys` array and optional `network_config`, `concurrency_and_buffer_size`, `proxy_config`.
+
+Supported provider keys: `openai`, `anthropic`, `azure`, `bedrock`, `vertex`, `gemini`, `mistral`, `groq`, `cohere`, `perplexity`, `xai`, `cerebras`, `openrouter`, `nebius`, `fireworks`, `parasail`, `huggingface`, `replicate`, `ollama`, `vllm`, `sgl`, `elevenlabs`, `runway`.
+
+Full documentation: [Provider Setup](/deployment-guides/config-json/providers).
+
+---
+
+## `governance`
+
+Seeds governance resources at startup. All sub-keys are optional arrays.
+
+| Sub-key | Description |
+|---------|-------------|
+| `auth_config` | Admin username/password auth for the dashboard |
+| `virtual_keys` | Scoped API tokens with provider/model allowlists |
+| `budgets` | Spend caps in USD over a rolling window |
+| `rate_limits` | Request and token rate limits |
+| `customers` | Customer entities (attach budgets/rate limits) |
+| `teams` | Team entities (attach to customers, budgets, rate limits) |
+| `routing_rules` | CEL-based dynamic provider/model routing |
+| `pricing_overrides` | Scoped per-model pricing overrides |
+| `model_configs` | Per-model rate limit and budget configurations |
+
+Full documentation: [Governance](/deployment-guides/config-json/governance).
+
+---
+
+## `guardrails_config`
+
+Enterprise-only. Two sub-keys: `guardrail_providers` (array) and `guardrail_rules` (array).
+
+Full documentation: [Guardrails](/deployment-guides/config-json/guardrails).
+
+---
+
+## `config_store`, `logs_store`, `vector_store`
+
+Storage backends. Each has `enabled` (boolean), `type` (string), and `config` (object).
+
+| Store | Types |
+|-------|-------|
+| `config_store` | `"sqlite"`, `"postgres"` |
+| `logs_store` | `"sqlite"`, `"postgres"` (+ optional `object_storage`) |
+| `vector_store` | `"weaviate"`, `"redis"`, `"qdrant"`, `"pinecone"` (`"redis"` also covers Valkey-compatible endpoints) |
+
+Full documentation: [Storage](/deployment-guides/config-json/storage).
+
+---
+
+## `framework`
+
+Controls model pricing catalog sync:
+
+```json
+{
+ "framework": {
+ "pricing": {
+ "pricing_url": "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
+ "pricing_sync_interval": 86400
+ }
+ }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `pricing.pricing_url` | LiteLLM catalog | URL of a model pricing JSON file |
+| `pricing.pricing_sync_interval` | `86400` | Sync interval in seconds (minimum: `3600`) |
+
+---
+
+## `websocket`
+
+Optional tuning for the WebSocket gateway (Responses API WebSocket mode, Realtime API). WebSocket is always enabled.
+
+```json
+{
+ "websocket": {
+ "max_connections_per_user": 100,
+ "transcript_buffer_size": 100,
+ "pool": {
+ "max_idle_per_key": 50,
+ "max_total_connections": 1000,
+ "idle_timeout_seconds": 600,
+ "max_connection_lifetime_seconds": 7200
+ }
+ }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `max_connections_per_user` | `100` | Max concurrent WebSocket connections per user |
+| `transcript_buffer_size` | `100` | Transcript entries buffered for Realtime API mid-session fallback |
+| `pool.max_idle_per_key` | `50` | Max idle upstream connections per provider/key |
+| `pool.max_total_connections` | `1000` | Max total idle upstream connections |
+| `pool.idle_timeout_seconds` | `600` | Evict idle connections after this many seconds |
+| `pool.max_connection_lifetime_seconds` | `7200` | Max lifetime of any upstream connection |
+
+---
+
+## Minimal Valid Config
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+ "providers": {
+ "openai": {
+ "keys": [
+ { "name": "primary", "value": "env.OPENAI_API_KEY", "models": ["*"], "weight": 1.0 }
+ ]
+ }
+ },
+ "config_store": { "enabled": false }
+}
+```
diff --git a/docs/deployment-guides/config-json/storage.mdx b/docs/deployment-guides/config-json/storage.mdx
new file mode 100644
index 0000000000..fd4bdbde97
--- /dev/null
+++ b/docs/deployment-guides/config-json/storage.mdx
@@ -0,0 +1,540 @@
+---
+title: "Storage"
+description: "Configure Bifrost storage backends in config.json — config_store, logs_store, vector_store, and object storage for logs"
+icon: "database"
+---
+
+Bifrost persists two types of data — **config** (providers, virtual keys, governance rules) and **logs** (request/response records). Each has its own store. A **vector store** is required for semantic caching.
+
+| Store | Purpose | Backends |
+|-------|---------|---------|
+| `config_store` | Provider configs, virtual keys, governance rules | SQLite, PostgreSQL |
+| `logs_store` | Request/response logs shown in UI | SQLite, PostgreSQL + optional S3/GCS offload |
+| `vector_store` | Semantic response caching | Weaviate, Redis, Valkey, Qdrant, Pinecone |
+
+
+If you use PostgreSQL for any store, the target database must be **UTF8 encoded**. See [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement).
+
+
+---
+
+## config_store
+
+
+When `config_store` is disabled (or absent), all configuration is loaded from `config.json` at startup only — the Web UI is disabled and changes require a restart. See [Two Configuration Modes](/deployment-guides/config-json#two-configuration-modes).
+
+
+
+
+
+
+### SQLite (Default)
+
+Simplest setup — no external database required. Bifrost stores configuration in a local SQLite file.
+
+```json
+{
+ "config_store": {
+ "enabled": true,
+ "type": "sqlite",
+ "config": {
+ "path": "./config.db"
+ }
+ }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `config.path` | Path to the SQLite file (relative to app-dir, or absolute) |
+
+
+
+
+
+### PostgreSQL
+
+Production-grade storage suitable for high-availability and high-throughput deployments.
+
+```json
+{
+ "config_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require",
+ "max_idle_conns": 5,
+ "max_open_conns": 50
+ }
+ }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `host` | — | PostgreSQL host (supports `env.` prefix) |
+| `port` | — | PostgreSQL port (as string) |
+| `user` | — | Database user (supports `env.` prefix) |
+| `password` | — | Database password (supports `env.` prefix). Leave empty for IAM role auth. |
+| `db_name` | — | Database name |
+| `ssl_mode` | — | `"disable"`, `"require"`, `"verify-ca"`, `"verify-full"` |
+| `max_idle_conns` | `5` | Maximum idle connections in the pool |
+| `max_open_conns` | `50` | Maximum open connections to the database |
+
+
+
+
+
+### Disabled (file-only mode)
+
+Use this when you want Bifrost to read all configuration from `config.json` only — no database, no Web UI.
+
+```json
+{
+ "config_store": {
+ "enabled": false
+ }
+}
+```
+
+This is the recommended setup for [multinode OSS deployments](/deployment-guides/how-to/multinode) where a shared `config.json` is the single source of truth.
+
+
+
+
+
+---
+
+## logs_store
+
+
+
+
+
+### SQLite
+
+```json
+{
+ "logs_store": {
+ "enabled": true,
+ "type": "sqlite",
+ "config": {
+ "path": "./logs.db"
+ }
+ }
+}
+```
+
+
+
+
+
+### PostgreSQL
+
+```json
+{
+ "logs_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require",
+ "max_idle_conns": 10,
+ "max_open_conns": 100
+ }
+ }
+}
+```
+
+For high log volumes, increase `max_open_conns`:
+
+```json
+{
+ "logs_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require",
+ "max_idle_conns": 10,
+ "max_open_conns": 200
+ },
+ "retention_days": 90
+ }
+}
+```
+
+
+
+
+
+```json
+{
+ "logs_store": {
+ "enabled": false
+ }
+}
+```
+
+
+
+
+
+### Log Retention
+
+Set `retention_days` to automatically purge old log entries. `0` disables retention-based cleanup.
+
+```json
+{
+ "logs_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": { "...": "..." },
+ "retention_days": 90
+ }
+}
+```
+
+### Object Storage for Logs
+
+Offload large request/response payloads from the database to S3 or GCS. The database retains only lightweight index records; payloads are fetched on demand.
+
+
+
+
+```json
+{
+ "logs_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": { "...": "..." },
+ "object_storage": {
+ "type": "s3",
+ "bucket": "env.S3_BUCKET",
+ "prefix": "bifrost",
+ "compress": true,
+ "region": "us-east-1",
+ "access_key_id": "env.S3_ACCESS_KEY_ID",
+ "secret_access_key": "env.S3_SECRET_ACCESS_KEY"
+ }
+ }
+}
+```
+
+**IAM role (instance profile / IRSA)** — omit `access_key_id` and `secret_access_key`:
+
+```json
+{
+ "object_storage": {
+ "type": "s3",
+ "bucket": "bifrost-logs",
+ "region": "us-east-1",
+ "compress": true,
+ "role_arn": "arn:aws:iam::123456789012:role/BifrostS3Role"
+ }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `bucket` | S3 bucket name (supports `env.` prefix) |
+| `prefix` | Key prefix for stored objects (default: `"bifrost"`) |
+| `compress` | Enable gzip compression (default: `false`) |
+| `region` | AWS region |
+| `access_key_id` | AWS access key ID (omit for default credential chain) |
+| `secret_access_key` | AWS secret access key |
+| `session_token` | STS temporary credentials session token |
+| `role_arn` | IAM role ARN for STS AssumeRole |
+| `endpoint` | Custom endpoint for MinIO / Cloudflare R2 |
+| `force_path_style` | Use path-style URLs (required for MinIO, default: `false`) |
+
+
+
+
+```json
+{
+ "logs_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": { "...": "..." },
+ "object_storage": {
+ "type": "gcs",
+ "bucket": "bifrost-logs",
+ "prefix": "bifrost",
+ "compress": true,
+ "project_id": "env.GCP_PROJECT_ID",
+ "credentials_json": "env.GCS_CREDENTIALS_JSON"
+ }
+ }
+}
+```
+
+Omit `credentials_json` to use Application Default Credentials (Workload Identity, GCE metadata, `gcloud auth`).
+
+| Field | Description |
+|-------|-------------|
+| `project_id` | GCP project ID (supports `env.` prefix) |
+| `credentials_json` | Service account JSON or path — omit for ADC |
+
+
+
+
+```json
+{
+ "object_storage": {
+ "type": "s3",
+ "bucket": "bifrost-logs",
+ "prefix": "bifrost",
+ "compress": false,
+ "region": "us-east-1",
+ "endpoint": "http://minio.internal:9000",
+ "access_key_id": "env.MINIO_ACCESS_KEY",
+ "secret_access_key": "env.MINIO_SECRET_KEY",
+ "force_path_style": true
+ }
+}
+```
+
+
+
+
+---
+
+## vector_store
+
+A vector store is required for [semantic caching](/features/semantic-caching). Choose from Weaviate, Redis/Valkey, Qdrant, or Pinecone.
+
+
+
+
+
+```json
+{
+ "vector_store": {
+ "enabled": true,
+ "type": "weaviate",
+ "config": {
+ "scheme": "http",
+ "host": "localhost:8080",
+ "api_key": "env.WEAVIATE_API_KEY",
+ "grpc_config": {
+ "host": "localhost:50051",
+ "secured": false
+ }
+ }
+ }
+}
+```
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `scheme` | Yes | `"http"` or `"https"` |
+| `host` | Yes | Weaviate server host and port |
+| `api_key` | No | Weaviate API key (supports `env.` prefix) |
+| `grpc_config.host` | No | gRPC host for faster vector operations |
+| `grpc_config.secured` | No | Use TLS for gRPC connection |
+
+
+
+
+
+```json
+{
+ "vector_store": {
+ "enabled": true,
+ "type": "redis",
+ "config": {
+ "addr": "env.REDIS_ADDR",
+ "password": "env.REDIS_PASSWORD",
+ "db": 0,
+ "use_tls": false
+ }
+ }
+}
+```
+
+**AWS MemoryDB (cluster mode):**
+
+```json
+{
+ "vector_store": {
+ "enabled": true,
+ "type": "redis",
+ "config": {
+ "addr": "env.MEMORYDB_ENDPOINT",
+ "password": "env.MEMORYDB_PASSWORD",
+ "use_tls": true,
+ "cluster_mode": true
+ }
+ }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `addr` | — | Redis/Valkey address `host:port` (supports `env.` prefix) |
+| `password` | — | Redis AUTH password (supports `env.` prefix) |
+| `db` | `0` | Redis database number |
+| `use_tls` | `false` | Enable TLS |
+| `cluster_mode` | `false` | Enable cluster mode (required for MemoryDB; `db` must be `0`) |
+| `pool_size` | — | Maximum socket connections |
+
+
+
+
+
+```json
+{
+ "vector_store": {
+ "enabled": true,
+ "type": "qdrant",
+ "config": {
+ "host": "env.QDRANT_HOST",
+ "port": 6334,
+ "api_key": "env.QDRANT_API_KEY",
+ "use_tls": false
+ }
+ }
+}
+```
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `host` | — | Qdrant server host (supports `env.` prefix) |
+| `port` | `6334` | gRPC port |
+| `api_key` | — | API key (supports `env.` prefix) |
+| `use_tls` | `false` | Enable TLS |
+
+
+
+
+
+Pinecone is external-only.
+
+```json
+{
+ "vector_store": {
+ "enabled": true,
+ "type": "pinecone",
+ "config": {
+ "api_key": "env.PINECONE_API_KEY",
+ "index_host": "env.PINECONE_INDEX_HOST"
+ }
+ }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `api_key` | Pinecone API key (supports `env.` prefix) |
+| `index_host` | Index host from Pinecone console (e.g. `your-index.svc.us-east1-gcp.pinecone.io`) |
+
+
+
+
+
+---
+
+## Mixed Backend Example
+
+Run the config store on PostgreSQL (for UI) while keeping logs on SQLite (simpler, cheaper for append-heavy workloads):
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+ "config_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require"
+ }
+ },
+
+ "logs_store": {
+ "enabled": true,
+ "type": "sqlite",
+ "config": {
+ "path": "./logs.db"
+ }
+ }
+}
+```
+
+---
+
+## Full Storage Example
+
+```json
+{
+ "$schema": "https://www.getbifrost.ai/schema",
+ "encryption_key": "env.BIFROST_ENCRYPTION_KEY",
+
+ "config_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require",
+ "max_idle_conns": 5,
+ "max_open_conns": 50
+ }
+ },
+
+ "logs_store": {
+ "enabled": true,
+ "type": "postgres",
+ "config": {
+ "host": "env.PG_HOST",
+ "port": "5432",
+ "user": "env.PG_USER",
+ "password": "env.PG_PASSWORD",
+ "db_name": "bifrost",
+ "ssl_mode": "require",
+ "max_idle_conns": 10,
+ "max_open_conns": 100
+ },
+ "retention_days": 90,
+ "object_storage": {
+ "type": "s3",
+ "bucket": "env.S3_BUCKET",
+ "region": "us-east-1",
+ "compress": true,
+ "access_key_id": "env.S3_ACCESS_KEY_ID",
+ "secret_access_key": "env.S3_SECRET_ACCESS_KEY"
+ }
+ },
+
+ "vector_store": {
+ "enabled": true,
+ "type": "weaviate",
+ "config": {
+ "scheme": "http",
+ "host": "weaviate:8080"
+ }
+ }
+}
+```
diff --git a/docs/deployment-guides/helm/guardrails.mdx b/docs/deployment-guides/helm/guardrails.mdx
new file mode 100644
index 0000000000..4604b426e4
--- /dev/null
+++ b/docs/deployment-guides/helm/guardrails.mdx
@@ -0,0 +1,262 @@
+---
+title: "Guardrails"
+description: "Configure guardrails providers and rules in Bifrost Helm deployments"
+icon: "shield-halved"
+---
+
+
+Guardrails are an **enterprise-only** feature. They require the enterprise Bifrost image.
+
+
+Guardrails are configured under `bifrost.guardrails` in your values file. The configuration has two parts:
+
+- **`providers`** — the backend that performs the check. Rules link to providers by `id`.
+- **`rules`** — CEL expressions that control when and where providers are invoked.
+
+---
+
+## Providers
+
+
+
+
+Runs entirely in-process with no external dependency. Patterns use RE2 syntax. Supports optional per-pattern flags: `i` (case-insensitive), `m` (multiline), `s` (dot-all).
+
+```yaml
+bifrost:
+ guardrails:
+ providers:
+ - id: 1
+ provider_name: "regex"
+ policy_name: "block-secrets"
+ enabled: true
+ timeout: 5
+ config:
+ patterns:
+ - pattern: "sk-[A-Za-z0-9]{20,}"
+ description: "OpenAI API key"
+ - pattern: "AKIA[0-9A-Z]{16}"
+ description: "AWS access key"
+ flags: "i"
+ - pattern: "gh[ps]_[A-Za-z0-9]{36}"
+ description: "GitHub token"
+```
+
+
+
+
+```yaml
+bifrost:
+ guardrails:
+ providers:
+ - id: 2
+ provider_name: "bedrock"
+ policy_name: "content-filter"
+ enabled: true
+ timeout: 15
+ config:
+ guardrail_arn: "arn:aws:bedrock:us-east-1::guardrail/abc123"
+ guardrail_version: "DRAFT" # or a published version number
+ region: "us-east-1"
+ access_key: "env.AWS_ACCESS_KEY_ID" # omit to use instance role
+ secret_key: "env.AWS_SECRET_ACCESS_KEY"
+```
+
+
+
+
+```yaml
+bifrost:
+ guardrails:
+ providers:
+ - id: 3
+ provider_name: "azure"
+ policy_name: "azure-content-safety"
+ enabled: true
+ timeout: 10
+ config:
+ endpoint: "https://your-resource.cognitiveservices.azure.com"
+ api_key: "env.AZURE_CONTENT_SAFETY_KEY"
+ analyze_enabled: true
+ analyze_severity_threshold: "medium" # low | medium | high
+ jailbreak_shield_enabled: true
+ indirect_attack_shield_enabled: true
+ copyright_enabled: false
+ text_blocklist_enabled: false
+ blocklist_names: []
+```
+
+
+
+
+```yaml
+bifrost:
+ guardrails:
+ providers:
+ - id: 4
+ provider_name: "grayswan"
+ policy_name: "grayswan-jailbreak"
+ enabled: true
+ timeout: 15
+ config:
+ api_key: "env.GRAYSWAN_API_KEY"
+ violation_threshold: 0.7 # 0.0–1.0; higher = more permissive
+ reasoning_mode: "standard" # standard | fast
+ policy_id: "" # optional: single policy ID
+ policy_ids: [] # optional: multiple policy IDs
+ rules: {} # optional: inline rule map
+```
+
+
+
+
+---
+
+## Rules
+
+Rules are CEL expressions that fire when their condition is met. Available CEL variables:
+
+| Variable | Type | Description |
+|----------|------|-------------|
+| `model` | `string` | Model name from the request |
+| `provider` | `string` | Provider name (e.g. `"openai"`) |
+| `headers` | `map` | HTTP request headers |
+| `params` | `map` | Query parameters |
+| `customer` | `string` | Customer ID |
+| `team` | `string` | Team ID |
+| `user` | `string` | User ID |
+
+Rule fields:
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `id` | Yes | Unique integer ID |
+| `name` | Yes | Human-readable name |
+| `description` | No | Optional description |
+| `enabled` | Yes | `true` to activate |
+| `cel_expression` | Yes | CEL boolean expression; `"true"` matches all requests |
+| `apply_to` | Yes | `"input"`, `"output"`, or `"both"` |
+| `sampling_rate` | No | `0`–`100`; percentage of requests to check (default: 100) |
+| `timeout` | No | Rule timeout in seconds |
+| `provider_config_ids` | No | Provider `id`s to invoke when this rule matches |
+
+```yaml
+bifrost:
+ guardrails:
+ rules:
+ - id: 101
+ name: "block-secrets-input"
+ description: "Block prompts containing API keys"
+ enabled: true
+ cel_expression: "true"
+ apply_to: "input"
+ sampling_rate: 100
+ timeout: 10
+ provider_config_ids: [1]
+
+ - id: 102
+ name: "azure-output-gpt4o"
+ description: "Scan GPT-4o responses"
+ enabled: true
+ cel_expression: "model == 'gpt-4o'"
+ apply_to: "output"
+ sampling_rate: 100
+ timeout: 15
+ provider_config_ids: [3]
+
+ - id: 103
+ name: "grayswan-openai-input"
+ enabled: true
+ cel_expression: "provider == 'openai'"
+ apply_to: "input"
+ sampling_rate: 50
+ timeout: 20
+ provider_config_ids: [4]
+
+ - id: 104
+ name: "strict-team-check"
+ enabled: true
+ cel_expression: "team == 'team-platform'"
+ apply_to: "both"
+ sampling_rate: 100
+ timeout: 30
+ provider_config_ids: [1, 3] # multiple providers run in parallel
+```
+
+---
+
+## Full example
+
+```yaml
+# guardrails-values.yaml
+image:
+ tag: "latest"
+
+bifrost:
+ encryptionKeySecret:
+ name: "bifrost-encryption"
+ key: "encryption-key"
+
+ guardrails:
+ providers:
+ - id: 1
+ provider_name: "regex"
+ policy_name: "block-secrets"
+ enabled: true
+ timeout: 5
+ config:
+ patterns:
+ - pattern: "sk-[A-Za-z0-9]{20,}"
+ description: "OpenAI API key"
+ - pattern: "AKIA[0-9A-Z]{16}"
+ description: "AWS access key"
+ - pattern: "gh[ps]_[A-Za-z0-9]{36}"
+ description: "GitHub token"
+
+ - id: 2
+ provider_name: "azure"
+ policy_name: "content-safety"
+ enabled: true
+ timeout: 10
+ config:
+ endpoint: "https://your-resource.cognitiveservices.azure.com"
+ api_key: "env.AZURE_CONTENT_SAFETY_KEY"
+ analyze_enabled: true
+ analyze_severity_threshold: "medium"
+ jailbreak_shield_enabled: true
+ indirect_attack_shield_enabled: false
+ copyright_enabled: false
+ text_blocklist_enabled: false
+
+ rules:
+ - id: 101
+ name: "block-secrets-input"
+ description: "Block prompts leaking credentials"
+ enabled: true
+ cel_expression: "true"
+ apply_to: "input"
+ sampling_rate: 100
+ timeout: 10
+ provider_config_ids: [1]
+
+ - id: 102
+ name: "content-safety-both"
+ description: "Azure content safety on input and output"
+ enabled: true
+ cel_expression: "true"
+ apply_to: "both"
+ sampling_rate: 100
+ timeout: 15
+ provider_config_ids: [2]
+```
+
+```bash
+kubectl create secret generic azure-content-safety \
+ --from-literal=key='your-azure-content-safety-api-key'
+
+helm install bifrost bifrost/bifrost \
+ -f guardrails-values.yaml \
+ --set env[0].name=AZURE_CONTENT_SAFETY_KEY \
+ --set env[0].valueFrom.secretKeyRef.name=azure-content-safety \
+ --set env[0].valueFrom.secretKeyRef.key=key
+```
\ No newline at end of file
diff --git a/docs/deployment-guides/helm/plugins.mdx b/docs/deployment-guides/helm/plugins.mdx
index f02303120b..79a4c4f788 100644
--- a/docs/deployment-guides/helm/plugins.mdx
+++ b/docs/deployment-guides/helm/plugins.mdx
@@ -6,15 +6,15 @@ icon: "puzzle-piece"
Plugins are configured under `bifrost.plugins`. Each plugin is independently enabled/disabled. Pre-hooks run in registration order; post-hooks run in reverse order.
+
+**Telemetry, logging, and governance are auto-loaded built-ins** — they are always active and do not need to be explicitly enabled. Their configuration lives in `bifrost.client.*` and `bifrost.governance.*`, not in the `plugins` block.
+
+The `plugins` block controls the opt-in plugins: `semanticCache`, `otel`, `datadog`, `maxim`, and custom plugins.
+
+
```yaml
bifrost:
plugins:
- telemetry:
- enabled: true
- logging:
- enabled: true
- governance:
- enabled: true
semanticCache:
enabled: false
otel:
@@ -24,17 +24,15 @@ bifrost:
```
```bash
-# Enable plugins at install time
+# Enable an opt-in plugin at install time
helm install bifrost bifrost/bifrost \
--set image.tag=v1.4.11 \
- --set bifrost.plugins.telemetry.enabled=true \
- --set bifrost.plugins.logging.enabled=true \
- --set bifrost.plugins.governance.enabled=true
+ --set bifrost.plugins.otel.enabled=true
# Or upgrade to enable a plugin without touching other values
helm upgrade bifrost bifrost/bifrost \
--reuse-values \
- --set bifrost.plugins.otel.enabled=true
+ --set bifrost.plugins.semanticCache.enabled=true
```
---
@@ -45,39 +43,21 @@ helm upgrade bifrost bifrost/bifrost \
### Telemetry (Prometheus)
-Exposes Prometheus metrics at `GET /metrics`.
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `bifrost.plugins.telemetry.enabled` | Enable Prometheus metrics | `false` |
-| `bifrost.plugins.telemetry.config.custom_labels` | Extra labels attached to every metric | `[]` |
-| `bifrost.plugins.telemetry.config.push_gateway.enabled` | Push metrics to a Prometheus Push Gateway | `false` |
-| `bifrost.plugins.telemetry.config.push_gateway.push_gateway_url` | Push Gateway URL | `""` |
-| `bifrost.plugins.telemetry.config.push_gateway.job_name` | Job label | `"bifrost"` |
-| `bifrost.plugins.telemetry.config.push_gateway.push_interval` | Push interval in seconds | `15` |
+
+Telemetry is **always active** — it cannot be disabled. You do not need to set `bifrost.plugins.telemetry.enabled`.
+
-**Basic setup:**
+Exposes Prometheus metrics at `GET /metrics`. Custom labels are set via `bifrost.client.prometheusLabels`:
```yaml
-# telemetry-values.yaml
-image:
- tag: "v1.4.11"
-
bifrost:
- plugins:
- telemetry:
- enabled: true
- config:
- custom_labels:
- - name: "environment"
- value: "production"
- - name: "region"
- value: "us-east-1"
+ client:
+ prometheusLabels:
+ - "environment=production"
+ - "region=us-east-1"
```
```bash
-helm upgrade bifrost bifrost/bifrost --reuse-values -f telemetry-values.yaml
-
# Verify metrics are exposed
kubectl port-forward svc/bifrost 8080:8080 &
curl http://localhost:8080/metrics | head -30
@@ -118,81 +98,60 @@ serviceMonitor:
### Request/Response Logging
-Persists full request and response data to the configured log store.
+
+Logging is **auto-loaded** when `bifrost.client.enableLogging: true` and a log store is configured. You do not need to set `bifrost.plugins.logging.enabled`.
+
+
+Configure logging via the `client` block:
| Parameter | Description | Default |
|-----------|-------------|---------|
-| `bifrost.plugins.logging.enabled` | Enable request/response logging | `false` |
-| `bifrost.plugins.logging.config.disable_content_logging` | Strip message body from logs | `false` |
-| `bifrost.plugins.logging.config.logging_headers` | HTTP headers to capture in log metadata | `[]` |
+| `bifrost.client.enableLogging` | Enable request/response logging | `true` |
+| `bifrost.client.disableContentLogging` | Strip message body from logs (HIPAA/PCI) | `false` |
+| `bifrost.client.loggingHeaders` | HTTP headers to capture in log metadata | `[]` |
```yaml
-# logging-values.yaml
-image:
- tag: "v1.4.11"
-
bifrost:
- plugins:
- logging:
- enabled: true
- config:
- disable_content_logging: false # set true for HIPAA/compliance
- logging_headers:
- - "x-request-id"
- - "x-user-id"
- - "x-team-id"
+ client:
+ enableLogging: true
+ disableContentLogging: false # set true for HIPAA/compliance
+ loggingHeaders:
+ - "x-request-id"
+ - "x-user-id"
+ - "x-team-id"
```
```bash
-helm upgrade bifrost bifrost/bifrost --reuse-values -f logging-values.yaml
-```
-
-**Verify logs are being written:**
-
-```bash
+# Verify logs are being written
kubectl port-forward svc/bifrost 8080:8080 &
-# Make a test request, then query logs
curl -s "http://localhost:8080/api/logs?limit=5" | jq .
```
-
-`bifrost.plugins.logging` controls the *plugin* (which hooks into every request). `bifrost.client.enableLogging` / `disableContentLogging` controls the *client-level* defaults. Both must be configured consistently — see the [Client Configuration](/deployment-guides/helm/client) page.
-
+See [Client Configuration](/deployment-guides/helm/client) for the full reference.
-### Governance Plugin
+### Governance
+
+
+Governance is **always active** for OSS deployments. You do not need to set `bifrost.plugins.governance.enabled`.
+
-Enforces budget caps, rate limits, and virtual key policies on every request. Must be enabled alongside `bifrost.governance` resource definitions.
+Virtual key enforcement is controlled by the `client` block:
| Parameter | Description | Default |
|-----------|-------------|---------|
-| `bifrost.plugins.governance.enabled` | Enable governance enforcement | `false` |
-| `bifrost.plugins.governance.config.is_vk_mandatory` | Reject requests without a virtual key | `false` |
-| `bifrost.plugins.governance.config.required_headers` | Additional headers required on every request | `[]` |
-| `bifrost.plugins.governance.config.is_enterprise` | Enable enterprise governance features | `false` |
+| `bifrost.client.enforceAuthOnInference` | Require a virtual key (`x-bf-vk`) on every inference request | `false` |
```yaml
-# governance-plugin-values.yaml
-image:
- tag: "v1.4.11"
-
bifrost:
- plugins:
- governance:
- enabled: true
- config:
- is_vk_mandatory: true # require virtual key on all inference requests
- required_headers: []
-```
-
-```bash
-helm upgrade bifrost bifrost/bifrost --reuse-values -f governance-plugin-values.yaml
+ client:
+ enforceAuthOnInference: true # require virtual key on all inference requests
```
-See the [Governance](/deployment-guides/helm/governance) page for defining budgets, rate limits, and virtual keys.
+Define virtual keys, budgets, rate limits, and routing rules in `bifrost.governance.*`. See the [Governance](/deployment-guides/helm/governance) page.
diff --git a/docs/docs.json b/docs/docs.json
index 63a911e1b6..1e3bfc72fd 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -381,9 +381,24 @@
"deployment-guides/helm/storage",
"deployment-guides/helm/plugins",
"deployment-guides/helm/governance",
+ "deployment-guides/helm/guardrails",
"deployment-guides/helm/cluster",
"deployment-guides/helm/troubleshooting"
]
+ },
+ {
+ "group": "config.json",
+ "icon": "file-code",
+ "pages": [
+ "deployment-guides/config-json",
+ "deployment-guides/config-json/schema-reference",
+ "deployment-guides/config-json/client",
+ "deployment-guides/config-json/providers",
+ "deployment-guides/config-json/storage",
+ "deployment-guides/config-json/plugins",
+ "deployment-guides/config-json/governance",
+ "deployment-guides/config-json/guardrails"
+ ]
}
]
},
diff --git a/helm-charts/bifrost/Chart.yaml b/helm-charts/bifrost/Chart.yaml
index b42c5f7533..73ebaad194 100644
--- a/helm-charts/bifrost/Chart.yaml
+++ b/helm-charts/bifrost/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
name: bifrost
description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers
type: application
-version: 2.1.0-prerelease1
-appVersion: "1.4.11"
+version: 2.1.1
+appVersion: "1.5.0"
keywords:
- ai
- gateway
diff --git a/transports/config.schema.json b/transports/config.schema.json
index 9459d18552..7b64e12e51 100644
--- a/transports/config.schema.json
+++ b/transports/config.schema.json
@@ -1045,20 +1045,20 @@
"version": {
"type": "integer",
"minimum": 1,
- "description": "Version of the plugin (default: 1). Increment in this number will force a reload of the plugin and DB update.",
+ "description": "DB-Backed Only. Version of the plugin (default: 1). Ignored in config.json. Increment in this number will force a reload of the plugin and DB update.",
"optional": true,
"default": 1
},
"placement": {
"type": "string",
"enum": ["pre_builtin", "post_builtin", "builtin"],
- "description": "Whether this plugin runs before, after, or as a built-in. Default: post_builtin",
+ "description": "DB-Backed Only. Whether this plugin runs before, after, or as a built-in. Default: post_builtin. Ignored in config.json.",
"optional": true,
"default": "post_builtin"
},
"order": {
"type": "integer",
- "description": "Position within placement group. Lower values execute earlier. Default: 0",
+ "description": "DB-Backed Only. Position within placement group. Lower values execute earlier. Default: 0. Ignored in config.json.",
"optional": true,
"default": 0
}
diff --git a/ui/package-lock.json b/ui/package-lock.json
index 0aa5b05ee4..7bb1bce8a3 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -11127,24 +11127,6 @@
"dev": true,
"license": "ISC"
},
- "node_modules/yaml": {
- "version": "2.8.3",
- "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz",
- "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==",
- "dev": true,
- "license": "ISC",
- "optional": true,
- "peer": true,
- "bin": {
- "yaml": "bin.mjs"
- },
- "engines": {
- "node": ">= 14.6"
- },
- "funding": {
- "url": "https://github.com/sponsors/eemeli"
- }
- },
"node_modules/zod": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/zod/-/zod-4.2.1.tgz",