diff --git a/docs/deployment-guides/helm.mdx b/docs/deployment-guides/helm.mdx index cc09723b8a..3be4c5aa5c 100644 --- a/docs/deployment-guides/helm.mdx +++ b/docs/deployment-guides/helm.mdx @@ -1,740 +1,103 @@ --- -title: "Helm" -description: "Deploy Bifrost on Kubernetes using Helm charts with flexible configuration options" -icon: "helicopter-symbol" +title: "Quick Start" +description: "Deploy Bifrost on Kubernetes using the official Helm chart — quickstart for OSS and Enterprise" +icon: "server" --- -Deploy Bifrost on Kubernetes using the official Helm chart. This is the recommended way to deploy Bifrost on Kubernetes with production-ready defaults and flexible configuration. - -**Latest Chart Version:** 1.5.0 | [View on Artifact Hub](https://artifacthub.io/packages/helm/bifrost/bifrost) +**Latest Chart Version:** 2.1.0 | [View on Artifact Hub](https://artifacthub.io/packages/helm/bifrost/bifrost) + + + + ## Prerequisites - Kubernetes cluster (v1.19+) - `kubectl` configured - Helm 3.2.0+ installed -- (Optional) Persistent Volume provisioner -- (Optional) Ingress controller +- Persistent Volume provisioner (required for SQLite; optional for Postgres-only) If you use PostgreSQL for Bifrost storage, ensure the database is UTF8 encoded. See [PostgreSQL UTF8 Requirement](../quickstart/gateway/setting-up#postgresql-utf8-requirement). -## Quick Start - -### Add Helm Repository +## Step 1 — Add the Helm Repository ```bash helm repo add bifrost https://maximhq.github.io/bifrost/helm-charts helm repo update ``` -### Install Bifrost - -```bash -helm install bifrost bifrost/bifrost --set image.tag=1.3.45 -``` +## Step 2 — Install -The `image.tag` parameter is required. Check [Docker Hub](https://hub.docker.com/r/maximhq/bifrost/tags) for available versions. +The Helm chart ships ready-made values files under `helm-charts/bifrost/values-examples/`. +For example: `sqlite-only.yaml`, `production-ha.yaml`, `external-postgres.yaml`, and `secrets-from-k8s.yaml`. +See the full list here: https://github.com/maximhq/bifrost/tree/main/helm-charts/bifrost/values-examples -This deploys Bifrost with: -- SQLite storage (10Gi PVC) -- Single replica -- ClusterIP service - -### Access Bifrost - -```bash -kubectl port-forward svc/bifrost 8080:8080 -curl http://localhost:8080/metrics -``` - -## Deployment Patterns - - - - -### Development Setup - -Simple setup for local testing and development. - -```bash -helm install bifrost bifrost/bifrost \ - --set image.tag=1.3.45 \ - --set bifrost.providers.openai.keys[0].value="sk-your-key" \ - --set bifrost.providers.openai.keys[0].weight=1 -``` - -**Features:** -- SQLite storage -- Single replica -- No auto-scaling -- ClusterIP service - -**Access:** -```bash -kubectl port-forward svc/bifrost 8080:8080 -``` - - - - - -### Production Setup - -High-availability setup with PostgreSQL and auto-scaling. - -```yaml -# production.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -replicaCount: 3 - -storage: - mode: postgres - -postgresql: - enabled: true - auth: - password: "your-secure-password" - primary: - persistence: - size: 50Gi - resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m - memory: 2Gi - -autoscaling: - enabled: true - minReplicas: 3 - maxReplicas: 10 - targetCPUUtilizationPercentage: 70 - targetMemoryUtilizationPercentage: 80 - -ingress: - enabled: true - className: nginx - annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod - hosts: - - host: bifrost.yourdomain.com - paths: - - path: / - pathType: Prefix - tls: - - secretName: bifrost-tls - hosts: - - bifrost.yourdomain.com - -resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m - memory: 2Gi - -bifrost: - encryptionKey: "your-32-byte-encryption-key" - logLevel: info - - client: - dropExcessRequests: true - enableLogging: true - - providers: - openai: - keys: - - value: "sk-..." - weight: 1 - - plugins: - telemetry: - enabled: true - logging: - enabled: true - governance: - enabled: true -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f production.yaml -``` - -**Features:** -- 3 initial replicas (scales 3-10) -- PostgreSQL database -- Ingress with TLS -- Monitoring enabled - - - - - -### AI Workloads with Semantic Caching - -Optimized for high-volume AI inference with caching. - -```yaml -# ai-workload.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -storage: - mode: postgres - -postgresql: - enabled: true - auth: - password: "secure-password" - primary: - persistence: - size: 50Gi - -vectorStore: - enabled: true - type: weaviate - weaviate: - enabled: true - persistence: - size: 50Gi - resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m - memory: 2Gi - -bifrost: - encryptionKey: "your-encryption-key" - - providers: - openai: - keys: - - value: "sk-..." - weight: 1 - - plugins: - semanticCache: - enabled: true - config: - provider: "openai" - embedding_model: "text-embedding-3-small" - dimension: 1536 - threshold: 0.8 - ttl: "5m" - cache_by_model: true - cache_by_provider: true -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f ai-workload.yaml -``` - -**Features:** -- PostgreSQL for config/logs -- Weaviate for vector storage -- Semantic caching enabled -- Optimized for AI workloads - - - - - -### Multi-Provider Setup - -Support multiple LLM providers with load balancing. - -```yaml -# multi-provider.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -bifrost: - encryptionKey: "your-encryption-key" - - client: - enableLogging: true - allowDirectKeys: false - - providers: - openai: - keys: - - value: "sk-..." - weight: 2 - anthropic: - keys: - - value: "sk-ant-..." - weight: 1 - gemini: - keys: - - value: "..." - weight: 1 - cohere: - keys: - - value: "..." - weight: 1 - - plugins: - telemetry: - enabled: true - logging: - enabled: true -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f multi-provider.yaml -``` - -**Features:** -- Multiple provider support -- Weighted load balancing -- Request/response logging -- Telemetry enabled - - - - - -### External Database - -Use existing PostgreSQL instance. - -```yaml -# external-db.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -storage: - mode: postgres - -postgresql: - enabled: false - external: - enabled: true - host: "postgres.example.com" - port: 5432 - user: "bifrost" - password: "your-password" - database: "bifrost" - sslMode: "require" - -bifrost: - encryptionKey: "your-encryption-key" - - providers: - openai: - keys: - - value: "sk-..." - weight: 1 -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f external-db.yaml -``` - -**Features:** -- Uses external PostgreSQL -- No embedded database -- SSL connection support - - - - - -### Using Kubernetes Secrets - -Store all sensitive values in Kubernetes secrets instead of values files. - -**Prerequisites:** Create Kubernetes secrets first: - -```bash -# PostgreSQL password -kubectl create secret generic postgres-credentials \ - --from-literal=password='your-postgres-password' - -# Encryption key -kubectl create secret generic bifrost-encryption \ - --from-literal=key='your-encryption-key' - -# Provider API keys -kubectl create secret generic provider-api-keys \ - --from-literal=openai-api-key='sk-...' \ - --from-literal=anthropic-api-key='sk-ant-...' - -# Qdrant API key (if using) -kubectl create secret generic qdrant-credentials \ - --from-literal=api-key='your-qdrant-api-key' -``` - -```yaml -# secrets-config.yaml -image: - tag: "1.3.45" - -storage: - mode: postgres - -# External PostgreSQL with secret reference -postgresql: - enabled: false - external: - enabled: true - host: "postgres.example.com" - port: 5432 - user: "bifrost" - database: "bifrost" - sslMode: "require" - existingSecret: "postgres-credentials" - passwordKey: "password" - -# Vector store with secret reference -vectorStore: - enabled: true - type: qdrant - qdrant: - external: - enabled: true - host: "qdrant.example.com" - port: 6334 - existingSecret: "qdrant-credentials" - apiKeyKey: "api-key" - -bifrost: - # Encryption key from secret - encryptionKeySecret: - name: "bifrost-encryption" - key: "key" - - # Provider configs using env var references - providers: - openai: - keys: - - value: "env.OPENAI_API_KEY" - weight: 1 - anthropic: - keys: - - value: "env.ANTHROPIC_API_KEY" - weight: 1 - - # Inject provider secrets as env vars - providerSecrets: - openai: - existingSecret: "provider-api-keys" - key: "openai-api-key" - envVar: "OPENAI_API_KEY" - anthropic: - existingSecret: "provider-api-keys" - key: "anthropic-api-key" - envVar: "ANTHROPIC_API_KEY" -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f secrets-config.yaml -``` - -**Features:** -- No sensitive values in values files -- Secrets managed by Kubernetes -- Works with external secret managers (Vault, AWS Secrets Manager via External Secrets Operator) - - - - -## Configuration - -### Key Parameters - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `image.tag` | **Required.** Bifrost image version (e.g., 1.3.45) | `""` | -| `replicaCount` | Number of replicas | `1` | -| `storage.mode` | Storage backend (sqlite/postgres) | `sqlite` | -| `storage.persistence.size` | PVC size for SQLite | `10Gi` | -| `postgresql.enabled` | Deploy PostgreSQL | `false` | -| `vectorStore.enabled` | Enable vector store | `false` | -| `vectorStore.type` | Vector store type (weaviate/redis/qdrant). Use `redis` for Redis or Valkey-compatible services | `none` | -| `bifrost.encryptionKey` | Encryption key | `""` | -| `ingress.enabled` | Enable ingress | `false` | -| `autoscaling.enabled` | Enable HPA | `false` | - -### Secret Reference Parameters - -Use existing Kubernetes secrets instead of plain-text values: - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `bifrost.encryptionKeySecret.name` | Secret name for encryption key | `""` | -| `bifrost.encryptionKeySecret.key` | Key within the secret | `""` | -| `postgresql.external.existingSecret` | Secret name for PostgreSQL password | `""` | -| `postgresql.external.passwordKey` | Key within the secret | `"password"` | -| `vectorStore.redis.external.existingSecret` | Secret name for Redis password | `""` | -| `vectorStore.redis.external.passwordKey` | Key within the secret | `"password"` | -| `vectorStore.weaviate.external.existingSecret` | Secret name for Weaviate API key | `""` | -| `vectorStore.weaviate.external.apiKeyKey` | Key within the secret | `"api-key"` | -| `vectorStore.qdrant.external.existingSecret` | Secret name for Qdrant API key | `""` | -| `vectorStore.qdrant.external.apiKeyKey` | Key within the secret | `"api-key"` | -| `bifrost.plugins.maxim.secretRef.name` | Secret name for Maxim API key | `""` | -| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` | -| `bifrost.providerSecrets..existingSecret` | Secret name for provider API key | `""` | -| `bifrost.providerSecrets..key` | Key within the secret | `"api-key"` | -| `bifrost.providerSecrets..envVar` | Environment variable name to inject | `""` | - -### Provider Configuration - -Add provider keys via values file: - -```yaml -bifrost: - providers: - openai: - keys: - - value: "sk-..." - weight: 1 - anthropic: - keys: - - value: "sk-ant-..." - weight: 1 -``` - -Or via command line: - -```bash -helm install bifrost bifrost/bifrost \ - --set image.tag=1.3.45 \ - --set bifrost.providers.openai.keys[0].value="sk-..." \ - --set bifrost.providers.openai.keys[0].weight=1 -``` - -#### Using Environment Variables for Provider Keys - -Bifrost supports `env.VAR_NAME` syntax to reference environment variables. Combined with `providerSecrets`, you can keep API keys in Kubernetes secrets: - -```yaml -bifrost: - providers: - openai: - keys: - - value: "env.OPENAI_API_KEY" # References environment variable - weight: 1 - - # Inject secrets as environment variables - providerSecrets: - openai: - existingSecret: "my-openai-secret" - key: "api-key" - envVar: "OPENAI_API_KEY" -``` - -This pattern: -1. Creates a Kubernetes secret with the API key -2. Injects the secret as an environment variable (`OPENAI_API_KEY`) -3. Bifrost resolves `env.OPENAI_API_KEY` at runtime - -### Plugin Configuration - -Enable and configure plugins: - -```yaml -bifrost: - plugins: - telemetry: - enabled: true - config: {} - - logging: - enabled: true - config: {} - - governance: - enabled: true - config: - is_vk_mandatory: false - - semanticCache: - enabled: true - config: - provider: "openai" - embedding_model: "text-embedding-3-small" - dimension: 1536 - threshold: 0.8 - ttl: "5m" - cache_by_model: true - cache_by_provider: true -``` - -## Operations - -### Upgrade - -```bash -# Update repository -helm repo update - -# Upgrade with same values -helm upgrade bifrost bifrost/bifrost --reuse-values - -# Upgrade with new values -helm upgrade bifrost bifrost/bifrost -f your-values.yaml -``` - -### Rollback - -```bash -# View release history -helm history bifrost - -# Rollback to previous version -helm rollback bifrost - -# Rollback to specific revision -helm rollback bifrost 2 -``` - -### Uninstall - -```bash -# Uninstall release -helm uninstall bifrost - -# Delete PVCs (if you want to remove data) -kubectl delete pvc -l app.kubernetes.io/instance=bifrost -``` - -### Scale - -```bash -# Scale manually -kubectl scale deployment bifrost --replicas=5 - -# Or update via Helm -helm upgrade bifrost bifrost/bifrost \ - --set replicaCount=5 \ - --reuse-values -``` - -## Monitoring - -### Prometheus Metrics - -Bifrost exposes Prometheus metrics at `/metrics`. - -Enable ServiceMonitor for automatic scraping: - -```yaml -serviceMonitor: - enabled: true - interval: 30s - scrapeTimeout: 10s -``` - -### Health Checks - -Check pod health: - -```bash -# View pod status -kubectl get pods -l app.kubernetes.io/name=bifrost - -# Check logs -kubectl logs -l app.kubernetes.io/name=bifrost --tail=100 - -# Describe pod -kubectl describe pod -l app.kubernetes.io/name=bifrost -``` - -### Metrics Endpoints - -```bash -# Port forward -kubectl port-forward svc/bifrost 8080:8080 - -# Check metrics -curl http://localhost:8080/metrics - -# Check health -curl http://localhost:8080/health -``` - -## Troubleshooting + + -### Pod Not Starting +Fastest way to get running. Bifrost deploys as a StatefulSet with a 10Gi PVC for SQLite. ```bash -# Check events -kubectl describe pod -l app.kubernetes.io/name=bifrost - -# Check logs -kubectl logs -l app.kubernetes.io/name=bifrost +kubectl create secret generic bifrost-encryption-key \ + --from-literal=encryption-key="$(openssl rand -base64 32)" -# Common issues: -# - Image pull errors: Check repository access -# - PVC binding: Check PVC status -# - Config errors: Validate ConfigMap +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.encryptionKeySecret.name="bifrost-encryption-key" \ + --set bifrost.encryptionKeySecret.key="encryption-key" ``` -### Database Connection Issues + + + +Add your first provider key at install time: ```bash -# For embedded PostgreSQL -kubectl exec -it deployment/bifrost-postgresql -- psql -U bifrost +kubectl create secret generic bifrost-encryption-key \ + --from-literal=encryption-key="$(openssl rand -base64 32)" -# Check connectivity from pod -kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432 +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-your-key' -# Check secret -kubectl get secret bifrost-config -o yaml +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.encryptionKeySecret.name="bifrost-encryption-key" \ + --set bifrost.encryptionKeySecret.key="encryption-key" \ + --set 'bifrost.providers.openai.keys[0].name=primary' \ + --set 'bifrost.providers.openai.keys[0].value=env.OPENAI_API_KEY' \ + --set 'bifrost.providers.openai.keys[0].weight=1' \ + --set bifrost.providerSecrets.openai.existingSecret="provider-keys" \ + --set bifrost.providerSecrets.openai.key="openai-api-key" \ + --set bifrost.providerSecrets.openai.envVar="OPENAI_API_KEY" ``` -### High Memory Usage - -```bash -# Check resource usage -kubectl top pods -l app.kubernetes.io/name=bifrost - -# Increase limits -helm upgrade bifrost bifrost/bifrost \ - --set resources.limits.memory=4Gi \ - --reuse-values -``` + + -### Ingress Not Working +High-availability setup — 3 replicas, PostgreSQL, autoscaling, ingress. ```bash -# Check ingress status -kubectl describe ingress bifrost +# 1. Create secrets +kubectl create secret generic bifrost-encryption-key \ + --from-literal=encryption-key="$(openssl rand -base64 32)" -# Check ingress controller logs -kubectl logs -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx +kubectl create secret generic postgres-credentials \ + --from-literal=password="$(openssl rand -base64 32)" -# Verify DNS -nslookup bifrost.yourdomain.com +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' ``` -## Advanced Configuration - -### Custom Values File - -Create `my-values.yaml`: - ```yaml +# production.yaml image: - tag: "1.3.45" # Required: specify the Bifrost version + tag: "v1.4.11" replicaCount: 3 @@ -744,105 +107,157 @@ storage: postgresql: enabled: true auth: - password: "secure-password" + username: bifrost + database: bifrost + existingSecret: "postgres-credentials" + secretKeys: + adminPasswordKey: "password" + primary: + persistence: + size: 50Gi + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 2Gi autoscaling: enabled: true minReplicas: 3 maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 ingress: enabled: true className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod hosts: - - host: bifrost.example.com + - host: bifrost.yourdomain.com paths: - path: / pathType: Prefix + tls: + - secretName: bifrost-tls + hosts: + - bifrost.yourdomain.com + +resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 2Gi bifrost: - encryptionKey: "your-32-byte-key" + encryptionKeySecret: + name: "bifrost-encryption-key" + key: "encryption-key" + + client: + initialPoolSize: 500 + dropExcessRequests: true + enableLogging: true + providers: openai: keys: - - value: "sk-..." + - name: "openai-primary" + value: "env.OPENAI_API_KEY" weight: 1 -``` -Install: + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + + plugins: + telemetry: + enabled: true + logging: + enabled: true + governance: + enabled: true +``` ```bash -helm install bifrost bifrost/bifrost -f my-values.yaml +# 2. Install +helm install bifrost bifrost/bifrost -f production.yaml ``` -### Environment Variables + + -Add custom environment variables: + +`image.tag` is required — the chart will not start without it. Check [Docker Hub](https://hub.docker.com/r/maximhq/bifrost/tags) for available versions. + -```yaml -env: - - name: CUSTOM_VAR - value: "custom-value" - -envFrom: - - secretRef: - name: bifrost-secrets - - configMapRef: - name: bifrost-config -``` +## Step 3 — Verify -### Node Affinity +```bash +# Check pods are running +kubectl get pods -l app.kubernetes.io/name=bifrost -Deploy to specific nodes: +# Port forward and hit the health endpoint +kubectl port-forward svc/bifrost 8080:8080 +curl http://localhost:8080/health -```yaml -nodeSelector: - node-type: ai-workload +# Check Prometheus metrics +curl http://localhost:8080/metrics +``` -affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchLabels: - app.kubernetes.io/name: bifrost - topologyKey: kubernetes.io/hostname +## Step 4 — Configure Providers & Plugins -tolerations: - - key: "gpu" - operator: "Equal" - value: "true" - effect: "NoSchedule" +```bash +# Make your first inference call +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-mini", + "messages": [{"role": "user", "content": "Hello from Bifrost!"}] + }' ``` -## Enterprise Deployment +Next steps: jump to [Next Steps](#next-steps). + + + + -For enterprise customers, Bifrost provides dedicated container images hosted in private registries with additional features, support, and SLAs. +Enterprise customers receive dedicated container images in a private registry, along with additional features, SLAs, and compliance documentation. [Book a demo](https://calendly.com/maximai/bifrost-demo) to know more about our enterprise features. -### Private Container Registry - -Enterprise customers receive access to Bifrost images in a private container registry. To use your enterprise registry, override the `image.repository` with your provided registry URL: +## Prerequisites - - +- Kubernetes cluster (v1.19+) +- `kubectl` configured +- Helm 3.2.0+ installed +- Enterprise registry credentials (provided by Maxim) -```yaml -# enterprise-gcp.yaml -image: - repository: us-west1-docker.pkg.dev/bifrost-enterprise/your-org/bifrost - tag: "latest" +## Step 1 — Add the Helm Repository -imagePullSecrets: - - name: gcr-secret +```bash +helm repo add bifrost https://maximhq.github.io/bifrost/helm-charts +helm repo update ``` -**Create the pull secret:** +## Step 2 — Create Pull Secret + +Create a Kubernetes image pull secret for our private enterprise registry: + + + ```bash -kubectl create secret docker-registry gcr-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=us-west1-docker.pkg.dev \ --docker-username=_json_key \ --docker-password="$(cat service-account-key.json)" \ @@ -852,46 +267,22 @@ kubectl create secret docker-registry gcr-secret \ -```yaml -# enterprise-aws.yaml -image: - repository: 123456789.dkr.ecr.us-east-1.amazonaws.com/bifrost - tag: "latest" - -imagePullSecrets: - - name: ecr-secret -``` - -**Create the pull secret:** - ```bash -kubectl create secret docker-registry ecr-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=123456789.dkr.ecr.us-east-1.amazonaws.com \ --docker-username=AWS \ --docker-password=$(aws ecr get-login-password --region us-east-1) ``` -ECR tokens expire after 12 hours. Consider using [ECR Credential Helper](https://github.com/awslabs/amazon-ecr-credential-helper) or an operator like [ECR Registry Creds](https://github.com/upmc-enterprises/registry-creds) for automatic token refresh. +ECR tokens expire after 12 hours. Use the [ECR Credential Helper](https://github.com/awslabs/amazon-ecr-credential-helper) or [ECR Registry Creds operator](https://github.com/upmc-enterprises/registry-creds) for automatic refresh. -```yaml -# enterprise-azure.yaml -image: - repository: yourregistry.azurecr.io/bifrost - tag: "latest" - -imagePullSecrets: - - name: acr-secret -``` - -**Create the pull secret:** - ```bash -kubectl create secret docker-registry acr-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=yourregistry.azurecr.io \ --docker-username= \ --docker-password= @@ -900,20 +291,8 @@ kubectl create secret docker-registry acr-secret \ -```yaml -# enterprise-self-hosted.yaml -image: - repository: registry.yourcompany.com/ai/bifrost - tag: "latest" - -imagePullSecrets: - - name: private-registry-secret -``` - -**Create the pull secret:** - ```bash -kubectl create secret docker-registry private-registry-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=registry.yourcompany.com \ --docker-username= \ --docker-password= @@ -922,14 +301,30 @@ kubectl create secret docker-registry private-registry-secret \ -### Full Enterprise Configuration +## Step 3 — Create Required Secrets + +```bash +# Encryption key +kubectl create secret generic bifrost-encryption \ + --from-literal=key="$(openssl rand -base64 32)" + +# Provider API keys +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' \ + --from-literal=anthropic-api-key='sk-ant-...' + +# Admin credentials (for dashboard + governance) +kubectl create secret generic bifrost-admin-credentials \ + --from-literal=username='admin' \ + --from-literal=password='secure-admin-password' +``` -Complete example for enterprise deployments with all recommended settings: +## Step 4 — Install ```yaml -# enterprise-full.yaml +# enterprise.yaml image: - # Your enterprise registry URL (provided by Maxim) + # Registry URL provided by Maxim repository: us-west1-docker.pkg.dev/bifrost-enterprise/your-org/bifrost tag: "latest" @@ -938,7 +333,6 @@ imagePullSecrets: replicaCount: 3 -# Production-grade resources resources: requests: cpu: 1000m @@ -947,7 +341,6 @@ resources: cpu: 4000m memory: 8Gi -# Auto-scaling for high availability autoscaling: enabled: true minReplicas: 3 @@ -955,14 +348,13 @@ autoscaling: targetCPUUtilizationPercentage: 70 targetMemoryUtilizationPercentage: 80 -# PostgreSQL storage storage: mode: postgres postgresql: enabled: true auth: - password: "secure-password" # Use existingSecret in production + password: "secure-password" # use existingSecret in production primary: persistence: size: 100Gi @@ -974,7 +366,6 @@ postgresql: cpu: 4000m memory: 8Gi -# Vector store for semantic caching vectorStore: enabled: true type: weaviate @@ -983,7 +374,6 @@ vectorStore: persistence: size: 100Gi -# Ingress with TLS ingress: enabled: true className: nginx @@ -1000,17 +390,16 @@ ingress: hosts: - bifrost.yourcompany.com -# Bifrost configuration bifrost: encryptionKeySecret: name: "bifrost-encryption" key: "key" - + client: initialPoolSize: 1000 dropExcessRequests: true enableLogging: true - disableContentLogging: false # Set to true for compliance + disableContentLogging: false # set true for HIPAA/compliance logRetentionDays: 365 enforceGovernanceHeader: true allowDirectKeys: false @@ -1018,29 +407,29 @@ bifrost: allowedOrigins: - "https://yourcompany.com" - "https://*.yourcompany.com" - - # Use secrets for provider keys + providers: openai: keys: - - value: "env.OPENAI_API_KEY" + - name: "openai-primary" + value: "env.OPENAI_API_KEY" weight: 1 anthropic: keys: - - value: "env.ANTHROPIC_API_KEY" + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" weight: 1 - + providerSecrets: openai: - existingSecret: "provider-api-keys" + existingSecret: "provider-keys" key: "openai-api-key" envVar: "OPENAI_API_KEY" anthropic: - existingSecret: "provider-api-keys" + existingSecret: "provider-keys" key: "anthropic-api-key" envVar: "ANTHROPIC_API_KEY" - - # Governance with authentication + governance: authConfig: isEnabled: true @@ -1048,8 +437,7 @@ bifrost: existingSecret: "bifrost-admin-credentials" usernameKey: "username" passwordKey: "password" - - # Enable all plugins + plugins: telemetry: enabled: true @@ -1068,7 +456,6 @@ bifrost: threshold: 0.85 ttl: "1h" -# Pod distribution affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -1078,52 +465,159 @@ affinity: topologyKey: kubernetes.io/hostname ``` -### Enterprise Prerequisites +```bash +helm install bifrost bifrost/bifrost -f enterprise.yaml +``` + +Next steps: jump to [Next Steps](#next-steps). + +## Enterprise Support + +Enterprise customers have access to: +- Dedicated Slack channel for support +- Priority bug fixes and feature requests +- Custom feature development +- SLA guarantees +- Compliance documentation (SOC2, HIPAA, etc.) + +Contact [support@getmaxim.ai](mailto:support@getmaxim.ai) for support. + + + + + +--- + +## Operations -Before deploying, create the required secrets: +### Upgrade ```bash -# 1. Registry pull secret (see registry-specific instructions above) +helm repo update -# 2. Encryption key -kubectl create secret generic bifrost-encryption \ - --from-literal=key='your-32-byte-encryption-key' +# Upgrade reusing all existing values +helm upgrade bifrost bifrost/bifrost --reuse-values -# 3. Provider API keys -kubectl create secret generic provider-api-keys \ - --from-literal=openai-api-key='sk-...' \ - --from-literal=anthropic-api-key='sk-ant-...' +# Upgrade with new values +helm upgrade bifrost bifrost/bifrost -f your-values.yaml -# 4. Admin credentials (for governance) -kubectl create secret generic bifrost-admin-credentials \ - --from-literal=username='admin' \ - --from-literal=password='secure-admin-password' +# Upgrade and override a single field +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set image.tag=v1.4.11 ``` -### Install Enterprise Build +### Rollback ```bash -helm install bifrost bifrost/bifrost -f enterprise-full.yaml +helm history bifrost +helm rollback bifrost # to previous revision +helm rollback bifrost 2 # to specific revision ``` -### Enterprise Support +### Scale -Enterprise customers have access to: -- Dedicated Slack channel for support -- Priority bug fixes and feature requests -- Custom feature development -- SLA guarantees -- Compliance documentation (SOC2, HIPAA, etc.) +```bash +kubectl scale deployment bifrost --replicas=5 + +# Or via Helm +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set replicaCount=5 +``` + +### Uninstall + +```bash +helm uninstall bifrost + +# Also remove PVCs (permanently deletes all data) +kubectl delete pvc -l app.kubernetes.io/instance=bifrost +``` + +--- + +## Monitoring + +### Prometheus Metrics + +Bifrost exposes Prometheus metrics at `/metrics`. + +Enable ServiceMonitor for automatic scraping: + +```yaml +serviceMonitor: + enabled: true + interval: 30s + scrapeTimeout: 10s +``` -Contact [support@getmaxim.ai](mailto:support@getmaxim.ai) for enterprise support. +### Health Checks + +Check pod health: + +```bash +# View pod status +kubectl get pods -l app.kubernetes.io/name=bifrost + +# Check logs +kubectl logs -l app.kubernetes.io/name=bifrost --tail=100 + +# Describe pod +kubectl describe pod -l app.kubernetes.io/name=bifrost +``` + +### Metrics Endpoints + +```bash +# Port forward +kubectl port-forward svc/bifrost 8080:8080 + +# Check metrics +curl http://localhost:8080/metrics + +# Check health +curl http://localhost:8080/health +``` + +--- + +## Configuration Guides + + + + All parameters, secret references, advanced config, example patterns + + + Pool size, logging, CORS, header filtering, compat shims, MCP settings + + + OpenAI, Anthropic, Azure, Bedrock, Vertex, Groq, self-hosted + + + SQLite, PostgreSQL, object storage for logs, vector stores + + + Telemetry, logging, semantic cache, OTel, Datadog, governance + + + Budgets, rate limits, virtual keys, routing rules + + + Multi-replica HA, gossip, peer discovery + + + Pod startup, database, ingress, PVC, secrets, performance + + + +--- ## Resources - [Helm Chart Repository](https://github.com/maximhq/bifrost/tree/main/helm-charts) - [Artifact Hub](https://artifacthub.io/packages/helm/bifrost/bifrost) -- [Complete Installation Guide](https://github.com/maximhq/bifrost/blob/main/helm-charts/INSTALL.md) - [Example Configurations](https://github.com/maximhq/bifrost/tree/main/helm-charts/bifrost/values-examples) -- [Kubernetes Secrets Example](https://github.com/maximhq/bifrost/blob/main/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml) - [GitHub Issues](https://github.com/maximhq/bifrost/issues) ## Next Steps diff --git a/docs/deployment-guides/helm/client.mdx b/docs/deployment-guides/helm/client.mdx new file mode 100644 index 0000000000..b3fd2dc968 --- /dev/null +++ b/docs/deployment-guides/helm/client.mdx @@ -0,0 +1,316 @@ +--- +title: "Client Configuration" +description: "Configure the Bifrost client: connection pool, logging, CORS, header filtering, compat shims, and MCP settings" +icon: "gear" +--- + +The `bifrost.client` block controls how Bifrost manages its internal worker pool, request logging, authentication enforcement, header policies, SDK compatibility shims, and MCP agent behaviour. All settings map directly to the `client` section of the rendered `config.json`. + +--- + +## Connection Pool + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.initialPoolSize` | Pre-allocated worker goroutines per provider queue | `300` | +| `bifrost.client.dropExcessRequests` | Drop requests when queue is full instead of waiting | `false` | + +A larger pool reduces latency spikes under burst load at the cost of higher baseline memory. For production workloads with multiple providers, `1000` is a common starting point. + +```yaml +# client-pool.yaml +image: + tag: "v1.4.11" + +bifrost: + client: + initialPoolSize: 1000 + dropExcessRequests: true # Return 429 instead of queuing indefinitely +``` + +```bash +helm install bifrost bifrost/bifrost -f client-pool.yaml + +# Or set inline +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.client.initialPoolSize=1000 \ + --set bifrost.client.dropExcessRequests=true +``` + +--- + +## Request & Response Logging + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.enableLogging` | Log all LLM requests and responses | `true` | +| `bifrost.client.disableContentLogging` | Strip message content from logs (keeps metadata) | `false` | +| `bifrost.client.logRetentionDays` | Days to retain log entries in the store | `365` | +| `bifrost.client.loggingHeaders` | HTTP request headers to capture in log metadata | `[]` | + +Set `disableContentLogging: true` for HIPAA / PCI compliance workloads where message content must not be persisted. + +```yaml +bifrost: + client: + enableLogging: true + disableContentLogging: true # PII / compliance: store metadata only + logRetentionDays: 90 + loggingHeaders: + - "x-request-id" + - "x-user-id" +``` + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.client.disableContentLogging=true \ + --set bifrost.client.logRetentionDays=90 +``` + +--- + +## Security & CORS + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.allowedOrigins` | CORS allowed origins | `["*"]` | +| `bifrost.client.allowDirectKeys` | Allow callers to pass provider keys directly in requests | `false` | +| `bifrost.client.enforceGovernanceHeader` | Require `x-bf-vk` virtual-key header on every request | `false` | +| `bifrost.client.maxRequestBodySizeMb` | Maximum allowed request body size | `100` | +| `bifrost.client.whitelistedRoutes` | Routes that bypass auth middleware | `[]` | + +```yaml +bifrost: + client: + allowedOrigins: + - "https://app.yourdomain.com" + - "https://admin.yourdomain.com" + allowDirectKeys: false # Prevent callers from supplying raw provider keys + enforceGovernanceHeader: true # Every request must carry a virtual key + maxRequestBodySizeMb: 50 + whitelistedRoutes: + - "/health" + - "/metrics" +``` + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.client.enforceGovernanceHeader=true \ + --set bifrost.client.allowDirectKeys=false +``` + +--- + +## Header Filtering + +Controls which `x-bf-eh-*` headers are forwarded to upstream LLM providers. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.headerFilterConfig.allowlist` | Only these headers are forwarded (whitelist mode) | `[]` | +| `bifrost.client.headerFilterConfig.denylist` | These headers are always blocked | `[]` | +| `bifrost.client.requiredHeaders` | Headers that must be present on every request | `[]` | +| `bifrost.client.allowedHeaders` | Additional headers permitted for CORS and WebSocket | `[]` | + +When both lists are empty, all `x-bf-eh-*` headers pass through. Specifying an `allowlist` enables strict whitelist mode — only listed headers are forwarded. + +```yaml +bifrost: + client: + headerFilterConfig: + allowlist: + - "x-bf-eh-anthropic-version" + - "x-bf-eh-openai-beta" + denylist: [] + requiredHeaders: + - "x-request-id" +``` + +--- + +## Authentication + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.authConfig.isEnabled` | Enable username/password auth for the API and dashboard | `false` | +| `bifrost.authConfig.adminUsername` | Admin username (plain text, prefer secret) | `""` | +| `bifrost.authConfig.adminPassword` | Admin password (plain text, prefer secret) | `""` | +| `bifrost.authConfig.existingSecret` | Kubernetes Secret name for credentials | `""` | +| `bifrost.authConfig.usernameKey` | Key within the secret for username | `"username"` | +| `bifrost.authConfig.passwordKey` | Key within the secret for password | `"password"` | +| `bifrost.authConfig.disableAuthOnInference` | Skip auth check on `/v1/*` inference routes | `false` | + +```bash +# Create secret first +kubectl create secret generic bifrost-admin \ + --from-literal=username='admin' \ + --from-literal=password='your-secure-password' +``` + +```yaml +bifrost: + authConfig: + isEnabled: true + disableAuthOnInference: false + existingSecret: "bifrost-admin" + usernameKey: "username" + passwordKey: "password" +``` + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + -f auth-values.yaml +``` + +--- + +## Encryption + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.encryptionKey` | 32-byte encryption key (plain text — use secret in production) | `""` | +| `bifrost.encryptionKeySecret.name` | Kubernetes Secret name containing the key | `""` | +| `bifrost.encryptionKeySecret.key` | Key within the secret | `"encryption-key"` | + +Always use a Kubernetes Secret in production: + +```bash +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key-here' +``` + +```yaml +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" +``` + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + -f encryption-values.yaml +``` + +--- + +## Async Jobs & Database Pings + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.disableDbPingsInHealth` | Exclude DB connectivity from `/health` checks | `false` | +| `bifrost.client.asyncJobResultTTL` | TTL (seconds) for async job results | `3600` | + +--- + +## Compat Shims + +Compatibility flags that let Bifrost silently adapt request/response shapes for SDK integrations: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.compat.convertTextToChat` | Wrap legacy text completions as chat messages | `false` | +| `bifrost.client.compat.convertChatToResponses` | Translate chat completions to Responses API format | `false` | +| `bifrost.client.compat.shouldDropParams` | Silently drop unsupported parameters instead of erroring | `false` | +| `bifrost.client.compat.shouldConvertParams` | Auto-convert parameter names across provider schemas | `false` | + +```yaml +bifrost: + client: + compat: + shouldDropParams: true # Useful when proxying mixed SDK traffic + convertTextToChat: true # For clients using the legacy /v1/completions endpoint +``` + +--- + +## Prometheus Labels + +Add custom labels to every Prometheus metric emitted by Bifrost: + +```yaml +bifrost: + client: + prometheusLabels: + - name: "environment" + value: "production" + - name: "region" + value: "us-east-1" +``` + +--- + +## MCP Agent Settings + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.mcpAgentDepth` | Maximum tool-call recursion depth for MCP agent mode | `10` | +| `bifrost.client.mcpToolExecutionTimeout` | Timeout per tool execution in seconds | `30` | +| `bifrost.client.mcpCodeModeBindingLevel` | Code mode binding level (`server` or `tool`) | `""` | +| `bifrost.client.mcpToolSyncInterval` | Global tool sync interval in minutes (`0` = disabled) | `0` | + +```yaml +bifrost: + client: + mcpAgentDepth: 15 + mcpToolExecutionTimeout: 60 +``` + +--- + +## Full Example + +```yaml +# client-full.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + authConfig: + isEnabled: true + disableAuthOnInference: false + existingSecret: "bifrost-admin" + usernameKey: "username" + passwordKey: "password" + + client: + initialPoolSize: 1000 + dropExcessRequests: true + allowedOrigins: + - "https://app.yourdomain.com" + enableLogging: true + disableContentLogging: false + logRetentionDays: 90 + enforceGovernanceHeader: true + allowDirectKeys: false + maxRequestBodySizeMb: 100 + headerFilterConfig: + allowlist: [] + denylist: [] + prometheusLabels: + - name: "environment" + value: "production" + mcpAgentDepth: 10 + mcpToolExecutionTimeout: 30 +``` + +```bash +# Create prerequisites +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key-here' + +kubectl create secret generic bifrost-admin \ + --from-literal=username='admin' \ + --from-literal=password='your-secure-password' + +# Install +helm install bifrost bifrost/bifrost -f client-full.yaml +``` diff --git a/docs/deployment-guides/helm/cluster.mdx b/docs/deployment-guides/helm/cluster.mdx new file mode 100644 index 0000000000..ea86536e5c --- /dev/null +++ b/docs/deployment-guides/helm/cluster.mdx @@ -0,0 +1,513 @@ +--- +title: "Cluster Mode & HA" +description: "Run Bifrost in a multi-replica cluster with gossip-based peer discovery, distributed state sync, and high-availability configuration" +icon: "network-wired" +--- + +Cluster mode enables multiple Bifrost replicas to share state — rate limits, budget counters, and governance data — across pods. When `bifrost.cluster.enabled` is `false` (the default), each replica operates independently and state is only shared via the database. + + +Cluster mode requires **PostgreSQL** as the storage backend. SQLite is single-node only. + + +## When to Use Cluster Mode + +| Scenario | Recommendation | +|----------|---------------| +| Single replica | Not needed | +| Multiple replicas, shared DB only | Optional — DB provides eventual consistency | +| Multiple replicas with strict per-minute rate limiting | **Enable cluster mode** — in-memory counters are synced via gossip | +| Geographic multi-region | Enable cluster mode with DNS or Consul discovery | + +--- + +## Basic Cluster Setup + +```yaml +# cluster-values.yaml +image: + tag: "v1.4.11" + +replicaCount: 3 + +storage: + mode: postgres + +postgresql: + external: + enabled: true + host: "your-postgres-host.example.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + cluster: + enabled: true + gossip: + port: 7946 + config: + timeoutSeconds: 10 + successThreshold: 3 + failureThreshold: 3 + +# Spread replicas across nodes for true HA +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +# Conservative scale-down: avoid killing pods mid-stream +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Pods + value: 1 + periodSeconds: 120 + +# Give in-flight SSE streams time to drain +terminationGracePeriodSeconds: 90 +lifecycle: + preStop: + exec: + command: ["sh", "-c", "sleep 20"] +``` + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-postgres-password' + +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key' + +helm install bifrost bifrost/bifrost -f cluster-values.yaml +``` + +--- + +## Peer Discovery + +Bifrost uses a gossip protocol (memberlist) for peer-to-peer state sync. Configure how peers find each other: + + + + + +Bifrost queries the Kubernetes API to find other Bifrost pods by label selector. No static peer list needed — works with HPA. + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: kubernetes + k8sNamespace: "default" # namespace where Bifrost runs + k8sLabelSelector: "app.kubernetes.io/name=bifrost" + gossip: + port: 7946 +``` + +The service account needs permission to list pods: + +```yaml +serviceAccount: + create: true + annotations: {} +``` + +```bash +# Create a ClusterRole and binding for pod discovery (apply once) +kubectl apply -f - <<'EOF' +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bifrost-pod-discovery + namespace: default +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["list", "get", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bifrost-pod-discovery + namespace: default +subjects: + - kind: ServiceAccount + name: bifrost + namespace: default +roleRef: + kind: Role + name: bifrost-pod-discovery + apiGroup: rbac.authorization.k8s.io +EOF +``` + +```bash +helm install bifrost bifrost/bifrost -f cluster-k8s-discovery-values.yaml +``` + + + + + +Uses a headless service DNS name to resolve peer IPs. Works well with StatefulSets (predictable pod DNS names). + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: dns + dnsNames: + - "bifrost-headless.default.svc.cluster.local" + gossip: + port: 7946 +``` + +The chart automatically creates a headless service (`bifrost-headless`) when cluster mode is enabled with a StatefulSet. For Deployments, create it manually: + +```bash +kubectl apply -f - <<'EOF' +apiVersion: v1 +kind: Service +metadata: + name: bifrost-headless +spec: + clusterIP: None + selector: + app.kubernetes.io/name: bifrost + ports: + - name: gossip + port: 7946 + protocol: TCP +EOF +``` + +```bash +helm install bifrost bifrost/bifrost -f cluster-dns-discovery-values.yaml +``` + + + + + +Enumerate peer addresses explicitly. Use when discovery mechanisms are unavailable or you want deterministic membership. + +```yaml +bifrost: + cluster: + enabled: true + peers: + - "bifrost-0.bifrost-headless.default.svc.cluster.local:7946" + - "bifrost-1.bifrost-headless.default.svc.cluster.local:7946" + - "bifrost-2.bifrost-headless.default.svc.cluster.local:7946" + gossip: + port: 7946 +``` + + +Static peers require StatefulSet pod names to be stable. This approach doesn't adapt to HPA-driven scaling — use Kubernetes or DNS discovery for dynamic replica counts. + + + + + + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: consul + consulAddress: "consul.consul.svc.cluster.local:8500" + gossip: + port: 7946 +``` + +```bash +helm install bifrost bifrost/bifrost -f cluster-consul-discovery-values.yaml +``` + + + + + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: etcd + etcdEndpoints: + - "http://etcd-0.etcd.default.svc.cluster.local:2379" + - "http://etcd-1.etcd.default.svc.cluster.local:2379" + - "http://etcd-2.etcd.default.svc.cluster.local:2379" + gossip: + port: 7946 +``` + + + + + +Best for local development or bare-metal clusters where multicast is available. + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: mdns + mdnsService: "_bifrost._tcp" + gossip: + port: 7946 +``` + + + + + +--- + +## Allowed Address Space + +Restrict gossip to a specific subnet (useful in multi-tenant clusters): + +```yaml +bifrost: + cluster: + discovery: + enabled: true + type: kubernetes + k8sNamespace: "default" + k8sLabelSelector: "app.kubernetes.io/name=bifrost" + allowedAddressSpace: + - "10.0.0.0/8" + - "172.16.0.0/12" +``` + +--- + +## Region-Aware Routing + +Tag replicas with a region identifier for latency-aware routing: + +```yaml +bifrost: + cluster: + enabled: true + region: "us-east-1" +``` + +--- + +## Full HA Production Example + +```yaml +# ha-production-values.yaml +image: + tag: "v1.4.11" + +replicaCount: 3 + +resources: + requests: + cpu: 1000m + memory: 1Gi + limits: + cpu: 4000m + memory: 4Gi + +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 15 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 75 + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Pods + value: 1 + periodSeconds: 120 + scaleUp: + stabilizationWindowSeconds: 30 + +terminationGracePeriodSeconds: 90 +lifecycle: + preStop: + exec: + command: ["sh", "-c", "sleep 20"] + +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + hosts: + - host: bifrost.yourdomain.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: bifrost-tls + hosts: + - bifrost.yourdomain.com + +storage: + mode: postgres + +postgresql: + external: + enabled: true + host: "rds.us-east-1.amazonaws.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + client: + initialPoolSize: 1000 + dropExcessRequests: true + enableLogging: true + enforceGovernanceHeader: true + + cluster: + enabled: true + region: "us-east-1" + discovery: + enabled: true + type: kubernetes + k8sNamespace: "default" + k8sLabelSelector: "app.kubernetes.io/name=bifrost" + gossip: + port: 7946 + config: + timeoutSeconds: 10 + successThreshold: 3 + failureThreshold: 3 + + plugins: + telemetry: + enabled: true + config: + push_gateway: + enabled: true + push_gateway_url: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091" + push_interval: 15 + logging: + enabled: true + governance: + enabled: true + config: + is_vk_mandatory: true + +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +serviceAccount: + create: true + annotations: {} +``` + +```bash +# Prerequisites +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-secure-postgres-password' + +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key' + +# RBAC for Kubernetes pod discovery +kubectl apply -f - <<'EOF' +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bifrost-pod-discovery + namespace: default +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["list", "get", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bifrost-pod-discovery + namespace: default +subjects: + - kind: ServiceAccount + name: bifrost + namespace: default +roleRef: + kind: Role + name: bifrost-pod-discovery + apiGroup: rbac.authorization.k8s.io +EOF + +# Install +helm install bifrost bifrost/bifrost -f ha-production-values.yaml + +# Verify all peers have found each other (check logs) +kubectl logs -l app.kubernetes.io/name=bifrost --tail=50 | grep -i gossip +``` + +--- + +## Verifying Cluster Health + +```bash +# Check all pods are running +kubectl get pods -l app.kubernetes.io/name=bifrost + +# Check gossip port is reachable between pods +kubectl exec -it bifrost-0 -- nc -zv bifrost-1.bifrost-headless 7946 + +# Check health endpoint +kubectl port-forward svc/bifrost 8080:8080 & +curl http://localhost:8080/health + +# View HPA status +kubectl get hpa bifrost + +# Scale manually during maintenance +kubectl scale deployment bifrost --replicas=5 +``` diff --git a/docs/deployment-guides/helm/governance.mdx b/docs/deployment-guides/helm/governance.mdx new file mode 100644 index 0000000000..3679d214d4 --- /dev/null +++ b/docs/deployment-guides/helm/governance.mdx @@ -0,0 +1,422 @@ +--- +title: "Governance" +description: "Configure Bifrost governance in Helm — budgets, rate limits, virtual keys, routing rules, and admin authentication" +icon: "shield" +--- + +Governance lets you control who can call which providers, how much they can spend, how fast they can go, and how traffic is routed. Everything is declared under `bifrost.governance` in your values file and seeded into the database at startup. + + +The governance **plugin** must also be enabled for enforcement to take effect: + +```yaml +bifrost: + plugins: + governance: + enabled: true +``` + +See the [Plugins](/deployment-guides/helm/plugins) page for plugin configuration details. + + +--- + +## Admin Authentication + +Protect the Bifrost dashboard and management API with username/password auth. + +```bash +kubectl create secret generic bifrost-admin-credentials \ + --from-literal=username='admin' \ + --from-literal=password='your-secure-admin-password' +``` + +```yaml +bifrost: + governance: + authConfig: + isEnabled: true + disableAuthOnInference: false # keep auth on inference routes + existingSecret: "bifrost-admin-credentials" + usernameKey: "username" + passwordKey: "password" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f governance-auth-values.yaml +``` + +--- + +## Budgets + +Spending caps that reset on a configurable period. Budgets are referenced by ID from virtual keys, teams, customers, or providers. + +| Reset duration | Syntax | +|----------------|--------| +| 30 seconds | `"30s"` | +| 5 minutes | `"5m"` | +| 1 hour | `"1h"` | +| 1 day | `"1d"` | +| 1 week | `"1w"` | +| 1 month | `"1M"` | +| 1 year | `"1Y"` | + +```yaml +bifrost: + governance: + budgets: + - id: "budget-dev" + max_limit: 50 # $50 per month + reset_duration: "1M" + + - id: "budget-production" + max_limit: 500 # $500 per month + reset_duration: "1M" + + - id: "budget-testing" + max_limit: 10 # $10 per day + reset_duration: "1d" + + - id: "budget-enterprise" + max_limit: 5000 # $5000 per month + reset_duration: "1M" +``` + +--- + +## Rate Limits + +Token and request-count caps per time window. Referenced by ID from virtual keys, teams, customers, or providers. + +```yaml +bifrost: + governance: + rateLimits: + - id: "rate-limit-standard" + token_max_limit: 100000 # 100K tokens per hour + token_reset_duration: "1h" + request_max_limit: 1000 # 1000 requests per hour + request_reset_duration: "1h" + + - id: "rate-limit-high" + token_max_limit: 500000 # 500K tokens per hour + token_reset_duration: "1h" + request_max_limit: 5000 + request_reset_duration: "1h" + + - id: "rate-limit-burst" + token_max_limit: 50000 # 50K tokens per minute (burst) + token_reset_duration: "1m" + request_max_limit: 500 + request_reset_duration: "1m" + + - id: "rate-limit-testing" + token_max_limit: 10000 + token_reset_duration: "1h" + request_max_limit: 100 + request_reset_duration: "1h" +``` + +--- + +## Customers & Teams + +Optional organizational hierarchy. Virtual keys can be assigned to customers or teams, inheriting their budgets and rate limits. + +```yaml +bifrost: + governance: + customers: + - id: "customer-acme" + name: "Acme Corp" + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + + - id: "customer-startup" + name: "Startup Inc" + budget_id: "budget-dev" + rate_limit_id: "rate-limit-standard" + + teams: + - id: "team-platform" + name: "Platform Team" + customer_id: "customer-acme" + budget_id: "budget-enterprise" + rate_limit_id: "rate-limit-high" + + - id: "team-ml" + name: "ML Team" + customer_id: "customer-acme" + budget_id: "budget-production" + rate_limit_id: "rate-limit-standard" +``` + +--- + +## Virtual Keys + +Virtual keys are the primary access tokens issued to callers. They scope which providers, models, and underlying API keys are accessible. + +```yaml +bifrost: + governance: + virtualKeys: + # 1. Unrestricted dev key — access to every provider + - id: "vk-dev-all" + name: "Dev: all providers" + value: "vk-dev-all-secret-token" + is_active: true + budget_id: "budget-dev" + rate_limit_id: "rate-limit-standard" + # No provider_configs → all providers allowed + + # 2. OpenAI only — restricted to two models + - id: "vk-openai-prod" + name: "OpenAI Production" + value: "vk-openai-prod-secret-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["gpt-4o", "gpt-4o-mini"] + # No keys[] → all configured OpenAI keys allowed + + # 3. Multi-provider with weighted routing + - id: "vk-multi" + name: "Multi-provider weighted" + value: "vk-multi-secret-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + provider_configs: + - provider: "openai" + weight: 2 # 50% + allowed_models: ["*"] + - provider: "anthropic" + weight: 1 # 25% + allowed_models: ["*"] + - provider: "groq" + weight: 1 # 25% + allowed_models: ["*"] + + # 4. Team-scoped key + - id: "vk-platform-team" + name: "Platform Team Key" + value: "vk-platform-team-token" + is_active: true + team_id: "team-platform" # inherits team budget/rate-limit + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["*"] + keys: + - name: "openai-primary" # pin to specific configured key + + # 5. Restricted testing key + - id: "vk-testing" + name: "Testing (gpt-4o-mini only)" + value: "vk-testing-token" + is_active: true + budget_id: "budget-testing" + rate_limit_id: "rate-limit-testing" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["gpt-4o-mini"] + + # 6. Batch API key + - id: "vk-batch" + name: "Batch API workloads" + value: "vk-batch-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-burst" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["*"] + keys: + - name: "openai-batch" # only the batch-flagged key +``` + +**Use a virtual key in API calls:** + +```bash +curl http://localhost:8080/v1/chat/completions \ + -H "x-bf-vk: vk-openai-prod-secret-token" \ + -H "Content-Type: application/json" \ + -d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello"}]}' +``` + +--- + +## Model Configs + +Apply budgets and rate limits at the model level, independent of virtual keys: + +```yaml +bifrost: + governance: + modelConfigs: + - id: "model-gpt4o" + model_name: "gpt-4o" + provider: "openai" + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + + - id: "model-claude" + model_name: "claude-3-5-sonnet-20241022" + provider: "anthropic" + rate_limit_id: "rate-limit-standard" +``` + +--- + +## Provider Governance + +Apply budgets and rate limits at the provider level: + +```yaml +bifrost: + governance: + providers: + - name: "openai" + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + send_back_raw_request: false + send_back_raw_response: false + + - name: "anthropic" + budget_id: "budget-production" + rate_limit_id: "rate-limit-standard" +``` + +--- + +## Routing Rules + +CEL-expression-based routing rules redirect requests to different providers or models based on request attributes. + +| Field | Description | +|-------|-------------| +| `cel_expression` | CEL expression evaluated against the request; if `true`, rule fires | +| `targets` | Provider/model targets with weights | +| `fallbacks` | Providers to try if all targets fail | +| `scope` | `global`, `team`, `customer`, or `virtual_key` | +| `scope_id` | Required for non-global scopes | +| `priority` | Lower number = evaluated first | + +```yaml +bifrost: + governance: + routingRules: + # Route all GPT requests to Azure + - id: "route-gpt-to-azure" + name: "GPT → Azure" + description: "Route all GPT model requests to Azure OpenAI" + enabled: true + cel_expression: "model.startsWith('gpt-')" + targets: + - provider: "azure" + model: "" # empty = use original model name + weight: 1.0 + fallbacks: ["openai"] + scope: "global" + priority: 0 + + # Route heavy models to a slower but cheaper provider + - id: "route-heavy-to-groq" + name: "Large context → Groq" + enabled: true + cel_expression: "model == 'gpt-4o' && request_body.max_tokens > 4000" + targets: + - provider: "groq" + model: "llama-3.3-70b-versatile" + weight: 1.0 + fallbacks: ["openai"] + scope: "global" + priority: 1 + + # Team-scoped rule + - id: "route-ml-team-bedrock" + name: "ML Team → Bedrock" + enabled: true + cel_expression: "true" # match all requests for this scope + targets: + - provider: "bedrock" + model: "" + weight: 1.0 + fallbacks: ["openai"] + scope: "team" + scope_id: "team-ml" + priority: 0 +``` + +--- + +## Full Example + +```yaml +# governance-full-values.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + plugins: + governance: + enabled: true + config: + is_vk_mandatory: true + + governance: + authConfig: + isEnabled: true + existingSecret: "bifrost-admin-credentials" + usernameKey: "username" + passwordKey: "password" + + budgets: + - id: "budget-production" + max_limit: 500 + reset_duration: "1M" + - id: "budget-dev" + max_limit: 50 + reset_duration: "1M" + + rateLimits: + - id: "rate-limit-standard" + token_max_limit: 100000 + token_reset_duration: "1h" + request_max_limit: 1000 + request_reset_duration: "1h" + + virtualKeys: + - id: "vk-production" + name: "Production" + value: "vk-prod-secret-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-standard" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["gpt-4o", "gpt-4o-mini"] +``` + +```bash +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-key' + +kubectl create secret generic bifrost-admin-credentials \ + --from-literal=username='admin' \ + --from-literal=password='secure-admin-password' + +helm install bifrost bifrost/bifrost -f governance-full-values.yaml +``` diff --git a/docs/deployment-guides/helm/plugins.mdx b/docs/deployment-guides/helm/plugins.mdx new file mode 100644 index 0000000000..f02303120b --- /dev/null +++ b/docs/deployment-guides/helm/plugins.mdx @@ -0,0 +1,578 @@ +--- +title: "Plugins" +description: "Configure Bifrost plugins in Helm — telemetry, logging, semantic cache, OpenTelemetry, Datadog, governance, and custom plugins" +icon: "puzzle-piece" +--- + +Plugins are configured under `bifrost.plugins`. Each plugin is independently enabled/disabled. Pre-hooks run in registration order; post-hooks run in reverse order. + +```yaml +bifrost: + plugins: + telemetry: + enabled: true + logging: + enabled: true + governance: + enabled: true + semanticCache: + enabled: false + otel: + enabled: false + datadog: + enabled: false +``` + +```bash +# Enable plugins at install time +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.plugins.telemetry.enabled=true \ + --set bifrost.plugins.logging.enabled=true \ + --set bifrost.plugins.governance.enabled=true + +# Or upgrade to enable a plugin without touching other values +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.plugins.otel.enabled=true +``` + +--- + + + + + +### Telemetry (Prometheus) + +Exposes Prometheus metrics at `GET /metrics`. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.telemetry.enabled` | Enable Prometheus metrics | `false` | +| `bifrost.plugins.telemetry.config.custom_labels` | Extra labels attached to every metric | `[]` | +| `bifrost.plugins.telemetry.config.push_gateway.enabled` | Push metrics to a Prometheus Push Gateway | `false` | +| `bifrost.plugins.telemetry.config.push_gateway.push_gateway_url` | Push Gateway URL | `""` | +| `bifrost.plugins.telemetry.config.push_gateway.job_name` | Job label | `"bifrost"` | +| `bifrost.plugins.telemetry.config.push_gateway.push_interval` | Push interval in seconds | `15` | + +**Basic setup:** + +```yaml +# telemetry-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + telemetry: + enabled: true + config: + custom_labels: + - name: "environment" + value: "production" + - name: "region" + value: "us-east-1" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f telemetry-values.yaml + +# Verify metrics are exposed +kubectl port-forward svc/bifrost 8080:8080 & +curl http://localhost:8080/metrics | head -30 +``` + +**With Prometheus Push Gateway** (recommended for multi-replica / HA setups where pull-based scraping can miss pods): + +```yaml +bifrost: + plugins: + telemetry: + enabled: true + config: + push_gateway: + enabled: true + push_gateway_url: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091" + job_name: "bifrost" + instance_id: "" # auto-derived from pod name if empty + push_interval: 15 + basic_auth: + username: "" + password: "" +``` + +**ServiceMonitor for Prometheus Operator:** + +```yaml +serviceMonitor: + enabled: true + interval: 30s + scrapeTimeout: 10s + namespace: monitoring # namespace where Prometheus is deployed +``` + + + + + +### Request/Response Logging + +Persists full request and response data to the configured log store. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.logging.enabled` | Enable request/response logging | `false` | +| `bifrost.plugins.logging.config.disable_content_logging` | Strip message body from logs | `false` | +| `bifrost.plugins.logging.config.logging_headers` | HTTP headers to capture in log metadata | `[]` | + +```yaml +# logging-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + logging: + enabled: true + config: + disable_content_logging: false # set true for HIPAA/compliance + logging_headers: + - "x-request-id" + - "x-user-id" + - "x-team-id" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f logging-values.yaml +``` + +**Verify logs are being written:** + +```bash +kubectl port-forward svc/bifrost 8080:8080 & +# Make a test request, then query logs +curl -s "http://localhost:8080/api/logs?limit=5" | jq . +``` + + +`bifrost.plugins.logging` controls the *plugin* (which hooks into every request). `bifrost.client.enableLogging` / `disableContentLogging` controls the *client-level* defaults. Both must be configured consistently — see the [Client Configuration](/deployment-guides/helm/client) page. + + + + + + +### Governance Plugin + +Enforces budget caps, rate limits, and virtual key policies on every request. Must be enabled alongside `bifrost.governance` resource definitions. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.governance.enabled` | Enable governance enforcement | `false` | +| `bifrost.plugins.governance.config.is_vk_mandatory` | Reject requests without a virtual key | `false` | +| `bifrost.plugins.governance.config.required_headers` | Additional headers required on every request | `[]` | +| `bifrost.plugins.governance.config.is_enterprise` | Enable enterprise governance features | `false` | + +```yaml +# governance-plugin-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + governance: + enabled: true + config: + is_vk_mandatory: true # require virtual key on all inference requests + required_headers: [] +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f governance-plugin-values.yaml +``` + +See the [Governance](/deployment-guides/helm/governance) page for defining budgets, rate limits, and virtual keys. + + + + + +### Semantic Cache + +Caches LLM responses using vector similarity so semantically equivalent prompts return cached answers. + +Two modes: +- **Semantic mode** (`dimension > 1`): uses an embedding model + vector store for similarity search +- **Direct / hash mode** (`dimension: 1`): exact-match hash-based caching, no embedding model needed + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.semanticCache.enabled` | Enable semantic caching | `false` | +| `bifrost.plugins.semanticCache.config.provider` | Embedding provider | `"openai"` | +| `bifrost.plugins.semanticCache.config.embedding_model` | Embedding model name | `"text-embedding-3-small"` | +| `bifrost.plugins.semanticCache.config.dimension` | Embedding dimension (`1` = direct/hash mode) | `1536` | +| `bifrost.plugins.semanticCache.config.threshold` | Cosine similarity threshold (0–1) | `0.8` | +| `bifrost.plugins.semanticCache.config.ttl` | Cache entry TTL (Go duration) | `"5m"` | +| `bifrost.plugins.semanticCache.config.conversation_history_threshold` | Number of past messages to include in cache key | `3` | +| `bifrost.plugins.semanticCache.config.cache_by_model` | Include model name in cache key | `true` | +| `bifrost.plugins.semanticCache.config.cache_by_provider` | Include provider name in cache key | `true` | +| `bifrost.plugins.semanticCache.config.exclude_system_prompt` | Exclude system prompt from cache key | `false` | +| `bifrost.plugins.semanticCache.config.cleanup_on_shutdown` | Delete cache data on pod shutdown | `false` | + +**Semantic mode (with OpenAI embeddings + Weaviate):** + +```bash +kubectl create secret generic semantic-cache-secret \ + --from-literal=openai-key='sk-your-openai-embedding-key' +``` + +```yaml +# semantic-cache-values.yaml +image: + tag: "v1.4.11" + +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: true + persistence: + size: 20Gi + +bifrost: + plugins: + semanticCache: + enabled: true + config: + provider: "openai" + keys: + - value: "env.SEMANTIC_CACHE_OPENAI_KEY" + weight: 1 + embedding_model: "text-embedding-3-small" + dimension: 1536 + threshold: 0.85 + ttl: "1h" + conversation_history_threshold: 5 + cache_by_model: true + cache_by_provider: true + + providerSecrets: + semantic-cache-key: + existingSecret: "semantic-cache-secret" + key: "openai-key" + envVar: "SEMANTIC_CACHE_OPENAI_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f semantic-cache-values.yaml +``` + +**Direct / hash mode** (no embedding provider needed): + +```yaml +bifrost: + plugins: + semanticCache: + enabled: true + config: + dimension: 1 # triggers hash-based exact matching + ttl: "30m" + cache_by_model: true + cache_by_provider: true +``` + + +The vector store (`vectorStore.*`) must be configured and enabled for semantic mode. Direct/hash mode works without a vector store but still requires a storage backend. + + + + + + +### OpenTelemetry (OTel) + +Sends distributed traces and push-based metrics to any OTLP-compatible collector (Jaeger, Tempo, Honeycomb, etc.). + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.otel.enabled` | Enable OTel tracing | `false` | +| `bifrost.plugins.otel.config.service_name` | Service name in traces | `"bifrost"` | +| `bifrost.plugins.otel.config.collector_url` | OTLP collector endpoint | `""` | +| `bifrost.plugins.otel.config.trace_type` | Trace type (`genai_extension` or `default`) | `"genai_extension"` | +| `bifrost.plugins.otel.config.protocol` | Transport protocol (`grpc` or `http`) | `"grpc"` | +| `bifrost.plugins.otel.config.metrics_enabled` | Enable OTLP push-based metrics | `false` | +| `bifrost.plugins.otel.config.metrics_endpoint` | OTLP metrics endpoint | `""` | +| `bifrost.plugins.otel.config.metrics_push_interval` | Push interval in seconds | `15` | +| `bifrost.plugins.otel.config.headers` | Custom headers for the collector | `{}` | +| `bifrost.plugins.otel.config.insecure` | Skip TLS verification | `false` | +| `bifrost.plugins.otel.config.tls_ca_cert` | Path to CA cert for TLS | `""` | + +```yaml +# otel-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + otel: + enabled: true + config: + service_name: "bifrost-production" + collector_url: "otel-collector.observability.svc.cluster.local:4317" + trace_type: "genai_extension" + protocol: "grpc" + insecure: true # set false in production with a proper cert + metrics_enabled: true + metrics_endpoint: "otel-collector.observability.svc.cluster.local:4317" + metrics_push_interval: 15 + headers: + x-honeycomb-team: "env.HONEYCOMB_API_KEY" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f otel-values.yaml +``` + +**With authentication headers from a Kubernetes Secret:** + +```bash +kubectl create secret generic otel-credentials \ + --from-literal=api-key='your-honeycomb-or-grafana-key' +``` + +```yaml +bifrost: + plugins: + otel: + enabled: true + config: + collector_url: "api.honeycomb.io:443" + protocol: "grpc" + headers: + x-honeycomb-team: "env.OTEL_API_KEY" + + providerSecrets: + otel-key: + existingSecret: "otel-credentials" + key: "api-key" + envVar: "OTEL_API_KEY" +``` + + + + + +### Datadog APM + +Sends traces to a Datadog Agent running in the cluster. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.datadog.enabled` | Enable Datadog tracing | `false` | +| `bifrost.plugins.datadog.config.service_name` | Service name | `"bifrost"` | +| `bifrost.plugins.datadog.config.agent_addr` | Datadog Agent address | `"localhost:8126"` | +| `bifrost.plugins.datadog.config.env` | Deployment environment tag | `""` | +| `bifrost.plugins.datadog.config.version` | Version tag | `""` | +| `bifrost.plugins.datadog.config.enable_traces` | Enable trace collection | `true` | +| `bifrost.plugins.datadog.config.custom_tags` | Extra tags on all spans | `{}` | + +The Datadog Agent is typically deployed via the [Datadog Helm chart](https://docs.datadoghq.com/containers/kubernetes/installation/) as a DaemonSet, making it available at the node's hostIP. + +```yaml +# datadog-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + datadog: + enabled: true + config: + service_name: "bifrost" + agent_addr: "$(HOST_IP):8126" # uses Datadog DaemonSet pattern + env: "production" + version: "v1.4.11" + enable_traces: true + custom_tags: + team: "platform" + region: "us-east-1" + +# Inject HOST_IP so Bifrost can reach the DaemonSet agent on the same node +env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f datadog-values.yaml +``` + + + + + +### Maxim Observability + +Sends LLM request/response data to [Maxim](https://getmaxim.ai) for tracing, evaluation, and observability. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.maxim.enabled` | Enable Maxim plugin | `false` | +| `bifrost.plugins.maxim.config.api_key` | Maxim API key (plain text, prefer secret) | `""` | +| `bifrost.plugins.maxim.config.log_repo_id` | Maxim log repository ID | `""` | +| `bifrost.plugins.maxim.secretRef.name` | Kubernetes Secret name for API key | `""` | +| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` | + +```bash +kubectl create secret generic maxim-credentials \ + --from-literal=api-key='your-maxim-api-key' +``` + +```yaml +# maxim-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + maxim: + enabled: true + config: + log_repo_id: "your-log-repo-id" + secretRef: + name: "maxim-credentials" + key: "api-key" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f maxim-values.yaml +``` + + + + + +### Custom / Dynamic Plugins + +Load a custom Go plugin (compiled `.so` file) at runtime. + +```yaml +bifrost: + plugins: + custom: + - name: "my-custom-plugin" + enabled: true + path: "/plugins/my-plugin.so" + version: 1 + config: + api_endpoint: "https://my-service.example.com" + timeout: 5000 +``` + +Mount the `.so` file via a volume: + +```yaml +volumes: + - name: custom-plugins + configMap: + name: bifrost-custom-plugins + +volumeMounts: + - name: custom-plugins + mountPath: /plugins +``` + +Or use an init container to download the plugin binary: + +```yaml +initContainers: + - name: download-plugin + image: curlimages/curl:8.6.0 + command: + - sh + - -c + - | + curl -fsSL https://plugins.example.com/my-plugin.so \ + -o /plugins/my-plugin.so + volumeMounts: + - name: plugin-dir + mountPath: /plugins + +volumes: + - name: plugin-dir + emptyDir: {} + +volumeMounts: + - name: plugin-dir + mountPath: /plugins +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f custom-plugin-values.yaml +``` + + + + + +--- + +## All Plugins Together + +```yaml +# all-plugins-values.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + plugins: + telemetry: + enabled: true + config: + custom_labels: + - name: "environment" + value: "production" + + logging: + enabled: true + config: + disable_content_logging: false + logging_headers: + - "x-request-id" + + governance: + enabled: true + config: + is_vk_mandatory: true + + semanticCache: + enabled: true + config: + provider: "openai" + keys: + - value: "env.CACHE_OPENAI_KEY" + weight: 1 + embedding_model: "text-embedding-3-small" + dimension: 1536 + threshold: 0.85 + ttl: "1h" + + otel: + enabled: true + config: + service_name: "bifrost" + collector_url: "otel-collector.observability.svc.cluster.local:4317" + protocol: "grpc" + insecure: true +``` + +```bash +helm install bifrost bifrost/bifrost -f all-plugins-values.yaml +``` diff --git a/docs/deployment-guides/helm/providers.mdx b/docs/deployment-guides/helm/providers.mdx new file mode 100644 index 0000000000..8a4e0ccc4c --- /dev/null +++ b/docs/deployment-guides/helm/providers.mdx @@ -0,0 +1,941 @@ +--- +title: "Provider Setup" +description: "Configure LLM providers in the Bifrost Helm chart — API keys, cloud-native auth, and self-hosted endpoints" +icon: "plug" +--- + +All providers are configured under `bifrost.providers` in your values file. Each provider entry contains a `keys` list where each key has a `name`, `value`, `weight`, and optional provider-specific config. + +**Two ways to supply credentials:** + +- **Direct value** — `value: "sk-..."` (fine for dev; avoid in production) +- **Kubernetes Secret + env var** — store the key in a Secret, inject as an env var, and reference it with `value: "env.VAR_NAME"` + +The `providerSecrets` block handles the Secret → env var injection automatically: + +```yaml +bifrost: + providers: + openai: + keys: + - name: "primary" + value: "env.OPENAI_API_KEY" # resolved at runtime + weight: 1 + + providerSecrets: + openai: + existingSecret: "my-openai-secret" + key: "api-key" + envVar: "OPENAI_API_KEY" # injected into the pod +``` + +--- + + + + + +### OpenAI + +Supports multiple keys with weighted load balancing. The key with `use_for_batch_api: true` is eligible for the Batch API. + +**Step 1 — Create secret** + +```bash +kubectl create secret generic openai-credentials \ + --from-literal=api-key-1='sk-your-primary-key' \ + --from-literal=api-key-2='sk-your-secondary-key' \ + --from-literal=api-key-batch='sk-your-batch-key' +``` + +**Step 2 — Values file** + +```yaml +# openai-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_KEY_1" + weight: 2 # 50% of traffic + models: ["*"] + - name: "openai-secondary" + value: "env.OPENAI_KEY_2" + weight: 1 # 25% + models: ["gpt-4o-mini"] # restrict to cheaper model + - name: "openai-batch" + value: "env.OPENAI_KEY_BATCH" + weight: 1 # 25% + models: ["*"] + use_for_batch_api: true + + providerSecrets: + openai-key-1: + existingSecret: "openai-credentials" + key: "api-key-1" + envVar: "OPENAI_KEY_1" + openai-key-2: + existingSecret: "openai-credentials" + key: "api-key-2" + envVar: "OPENAI_KEY_2" + openai-key-batch: + existingSecret: "openai-credentials" + key: "api-key-batch" + envVar: "OPENAI_KEY_BATCH" +``` + +**Step 3 — Install** + +```bash +helm install bifrost bifrost/bifrost -f openai-values.yaml +``` + +**Optional — per-provider network config** + +```yaml +bifrost: + providers: + openai: + keys: + - name: "primary" + value: "env.OPENAI_KEY_1" + weight: 1 + network_config: + default_request_timeout_in_seconds: 120 + max_retries: 3 + retry_backoff_initial_ms: 500 + retry_backoff_max_ms: 5000 + max_conns_per_host: 5000 +``` + + + + + +### Anthropic + +```bash +kubectl create secret generic anthropic-credentials \ + --from-literal=api-key-1='sk-ant-your-primary-key' \ + --from-literal=api-key-2='sk-ant-your-secondary-key' +``` + +```yaml +# anthropic-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_KEY_1" + weight: 1 + models: ["*"] + - name: "anthropic-secondary" + value: "env.ANTHROPIC_KEY_2" + weight: 1 + models: ["*"] + + providerSecrets: + anthropic-key-1: + existingSecret: "anthropic-credentials" + key: "api-key-1" + envVar: "ANTHROPIC_KEY_1" + anthropic-key-2: + existingSecret: "anthropic-credentials" + key: "api-key-2" + envVar: "ANTHROPIC_KEY_2" +``` + +```bash +helm install bifrost bifrost/bifrost -f anthropic-values.yaml +``` + +**Override Anthropic beta headers** (optional): + +```yaml +bifrost: + providers: + anthropic: + keys: + - name: "primary" + value: "env.ANTHROPIC_KEY_1" + weight: 1 + network_config: + beta_header_overrides: + redact-thinking-: true +``` + + + + + +### Azure OpenAI + +Azure requires `azure_key_config` on every key with `endpoint`, `api_version`, and a `deployments` map (logical model name → Azure deployment name). + +Two auth modes are supported: + + + + +**Step 1 — Create secret** + +```bash +kubectl create secret generic azure-credentials \ + --from-literal=api-key='your-azure-openai-api-key' \ + --from-literal=endpoint='https://your-resource.openai.azure.com' +``` + +**Step 2 — Values file** + +```yaml +# azure-apikey-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + azure: + keys: + - name: "azure-primary" + value: "env.AZURE_API_KEY" + weight: 1 + models: ["gpt-4o", "gpt-4o-mini", "text-embedding-3-small"] + azure_key_config: + endpoint: "env.AZURE_ENDPOINT" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-prod" + gpt-4o-mini: "gpt-4o-mini-prod" + text-embedding-3-small: "embeddings-prod" + + providerSecrets: + azure-api-key: + existingSecret: "azure-credentials" + key: "api-key" + envVar: "AZURE_API_KEY" + azure-endpoint: + existingSecret: "azure-credentials" + key: "endpoint" + envVar: "AZURE_ENDPOINT" +``` + +**Step 3 — Install** + +```bash +helm install bifrost bifrost/bifrost -f azure-apikey-values.yaml +``` + + + + +When `value` is empty, Bifrost uses `DefaultAzureCredential` — which automatically resolves credentials from: +- AKS Workload Identity (recommended for production) +- Azure VM managed identity +- `az login` (developer machines) + +**Step 1 — Annotate the service account** (AKS Workload Identity) + +```bash +# Associate the Kubernetes service account with your Azure managed identity +kubectl annotate serviceaccount bifrost \ + azure.workload.identity/client-id="" +``` + +```yaml +serviceAccount: + annotations: + azure.workload.identity/client-id: "" +``` + +**Step 2 — Values file** + +```bash +kubectl create secret generic azure-config \ + --from-literal=endpoint='https://your-resource.openai.azure.com' +``` + +```yaml +# azure-msi-values.yaml +image: + tag: "v1.4.11" + +serviceAccount: + annotations: + azure.workload.identity/client-id: "" + +bifrost: + providers: + azure: + keys: + - name: "azure-workload-identity" + value: "" # empty = DefaultAzureCredential + weight: 1 + models: ["gpt-4o"] + azure_key_config: + endpoint: "env.AZURE_ENDPOINT" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-prod" + + providerSecrets: + azure-endpoint: + existingSecret: "azure-config" + key: "endpoint" + envVar: "AZURE_ENDPOINT" +``` + +**Step 3 — Install** + +```bash +helm install bifrost bifrost/bifrost -f azure-msi-values.yaml +``` + + + + +**Multi-region failover** (two deployments, different regions): + +```yaml +bifrost: + providers: + azure: + keys: + - name: "eastus" + value: "env.AZURE_KEY_EAST" + weight: 1 + azure_key_config: + endpoint: "env.AZURE_ENDPOINT_EAST" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-eastus" + - name: "westus" + value: "env.AZURE_KEY_WEST" + weight: 1 + azure_key_config: + endpoint: "env.AZURE_ENDPOINT_WEST" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-westus" +``` + + + + + +### AWS Bedrock + +Bedrock requires `bedrock_key_config` with at minimum a `region`. Three auth modes: + + + + +```bash +kubectl create secret generic aws-credentials \ + --from-literal=access-key-id='AKIAIOSFODNN7EXAMPLE' \ + --from-literal=secret-access-key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' +``` + +```yaml +# bedrock-static-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + bedrock: + keys: + - name: "bedrock-static" + value: "" + weight: 1 + models: ["*"] + bedrock_key_config: + region: "us-east-1" + access_key: "env.AWS_ACCESS_KEY_ID" + secret_key: "env.AWS_SECRET_ACCESS_KEY" + deployments: + # Logical name -> Bedrock inference profile + anthropic.claude-3-5-sonnet: "us.anthropic.claude-3-5-sonnet-20240620-v1:0" + + providerSecrets: + aws-access-key: + existingSecret: "aws-credentials" + key: "access-key-id" + envVar: "AWS_ACCESS_KEY_ID" + aws-secret-key: + existingSecret: "aws-credentials" + key: "secret-access-key" + envVar: "AWS_SECRET_ACCESS_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f bedrock-static-values.yaml +``` + + + + +When only `region` is set, Bifrost inherits credentials from the AWS SDK default chain — IRSA (IAM Roles for Service Accounts), EC2 instance profile, or `AWS_*` env vars. + +**Step 1 — Annotate the service account with the IAM role** + +```bash +kubectl annotate serviceaccount bifrost \ + eks.amazonaws.com/role-arn="arn:aws:iam::123456789012:role/BifrostBedrockRole" +``` + +```yaml +serviceAccount: + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::123456789012:role/BifrostBedrockRole" +``` + +**Step 2 — Values file** + +```yaml +# bedrock-irsa-values.yaml +image: + tag: "v1.4.11" + +serviceAccount: + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::123456789012:role/BifrostBedrockRole" + +bifrost: + providers: + bedrock: + keys: + - name: "bedrock-irsa" + value: "" + weight: 1 + models: ["*"] + bedrock_key_config: + region: "us-east-1" + # No access_key / secret_key — SDK uses IRSA token automatically +``` + +```bash +helm install bifrost bifrost/bifrost -f bedrock-irsa-values.yaml +``` + + + + +Assumes a cross-account role on top of the default credential chain. + +```yaml +# bedrock-assumerole-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + bedrock: + keys: + - name: "bedrock-assumerole" + value: "" + weight: 1 + models: ["*"] + bedrock_key_config: + region: "us-west-2" + # Source identity from pod's default chain, then assume this role + role_arn: "env.AWS_ROLE_ARN" + external_id: "env.AWS_EXTERNAL_ID" + session_name: "bifrost-session" +``` + +```bash +kubectl create secret generic aws-role-config \ + --from-literal=role-arn='arn:aws:iam::999999999999:role/CrossAccountBedrockRole' \ + --from-literal=external-id='your-external-id' +``` + +```yaml + providerSecrets: + aws-role-arn: + existingSecret: "aws-role-config" + key: "role-arn" + envVar: "AWS_ROLE_ARN" + aws-external-id: + existingSecret: "aws-role-config" + key: "external-id" + envVar: "AWS_EXTERNAL_ID" +``` + +```bash +helm install bifrost bifrost/bifrost -f bedrock-assumerole-values.yaml +``` + + + + +**Batch API — S3 configuration** + +```yaml +bedrock_key_config: + region: "us-east-1" + access_key: "env.AWS_ACCESS_KEY_ID" + secret_key: "env.AWS_SECRET_ACCESS_KEY" + batch_s3_config: + buckets: + - bucket_name: "my-bedrock-batch-bucket" + prefix: "batch/" + is_default: true +``` + + + + + +### Google Vertex AI + +Vertex requires `vertex_key_config` with `project_id` and `region`. Two auth modes: + + + + +```bash +# Base64-encode the service account JSON +SA_JSON=$(cat service-account-key.json | base64 -w 0) + +kubectl create secret generic gcp-credentials \ + --from-literal=service-account-json="${SA_JSON}" +``` + +```yaml +# vertex-sa-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + vertex: + keys: + - name: "vertex-sa-key" + value: "" + weight: 1 + models: ["*"] + vertex_key_config: + project_id: "env.VERTEX_PROJECT_ID" + region: "us-central1" + auth_credentials: "env.VERTEX_AUTH_CREDENTIALS" + + providerSecrets: + vertex-project-id: + existingSecret: "gcp-credentials" + key: "project-id" + envVar: "VERTEX_PROJECT_ID" + vertex-sa: + existingSecret: "gcp-credentials" + key: "service-account-json" + envVar: "VERTEX_AUTH_CREDENTIALS" +``` + +```bash +helm install bifrost bifrost/bifrost -f vertex-sa-values.yaml +``` + + + + +When `auth_credentials` is omitted, Bifrost calls `google.FindDefaultCredentials` — which resolves to: +- GKE Workload Identity (recommended) +- GCE metadata server (on Compute Engine / Cloud Run) +- `GOOGLE_APPLICATION_CREDENTIALS` path +- `gcloud auth application-default login` (developer machines) + +**Step 1 — Annotate the service account** (GKE Workload Identity) + +```bash +gcloud iam service-accounts add-iam-policy-binding \ + bifrost-sa@my-project.iam.gserviceaccount.com \ + --role roles/iam.workloadIdentityUser \ + --member "serviceAccount:my-project.svc.id.goog[default/bifrost]" +``` + +```yaml +serviceAccount: + annotations: + iam.gke.io/gcp-service-account: "bifrost-sa@my-project.iam.gserviceaccount.com" +``` + +**Step 2 — Values file** + +```yaml +# vertex-wli-values.yaml +image: + tag: "v1.4.11" + +serviceAccount: + annotations: + iam.gke.io/gcp-service-account: "bifrost-sa@my-project.iam.gserviceaccount.com" + +bifrost: + providers: + vertex: + keys: + - name: "vertex-workload-identity" + value: "" + weight: 1 + models: ["*"] + vertex_key_config: + project_id: "my-gcp-project" + region: "us-central1" + # auth_credentials intentionally omitted → ADC lookup +``` + +```bash +helm install bifrost bifrost/bifrost -f vertex-wli-values.yaml +``` + + + + + + + + +### Standard API-Key Providers + +These providers follow the same simple pattern — one or more keys with weights. + + + + +```bash +kubectl create secret generic groq-credentials \ + --from-literal=api-key='gsk_your_groq_api_key' +``` + +```yaml +bifrost: + providers: + groq: + keys: + - name: "groq-primary" + value: "env.GROQ_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + groq-key: + existingSecret: "groq-credentials" + key: "api-key" + envVar: "GROQ_API_KEY" +``` + + + + +```bash +kubectl create secret generic gemini-credentials \ + --from-literal=api-key='your-gemini-api-key' +``` + +```yaml +bifrost: + providers: + gemini: + keys: + - name: "gemini-main" + value: "env.GEMINI_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + gemini-key: + existingSecret: "gemini-credentials" + key: "api-key" + envVar: "GEMINI_API_KEY" +``` + + + + +```bash +kubectl create secret generic mistral-credentials \ + --from-literal=api-key='your-mistral-api-key' +``` + +```yaml +bifrost: + providers: + mistral: + keys: + - name: "mistral-main" + value: "env.MISTRAL_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + mistral-key: + existingSecret: "mistral-credentials" + key: "api-key" + envVar: "MISTRAL_API_KEY" +``` + + + + +All standard API-key providers follow the same pattern. Replace the provider name and env var name accordingly: + +```yaml +bifrost: + providers: + cohere: + keys: + - name: "cohere-main" + value: "env.COHERE_API_KEY" + weight: 1 + perplexity: + keys: + - name: "perplexity-main" + value: "env.PERPLEXITY_API_KEY" + weight: 1 + xai: + keys: + - name: "xai-main" + value: "env.XAI_API_KEY" + weight: 1 + cerebras: + keys: + - name: "cerebras-main" + value: "env.CEREBRAS_API_KEY" + weight: 1 + openrouter: + keys: + - name: "openrouter-main" + value: "env.OPENROUTER_API_KEY" + weight: 1 + nebius: + keys: + - name: "nebius-main" + value: "env.NEBIUS_API_KEY" + weight: 1 +``` + + + + +**Install command (any of the above)** + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + -f provider-values.yaml +``` + + + + + +### Self-Hosted Providers + +Self-hosted providers point to a URL you operate. No API key is typically required (`value: ""`). + + + + +```yaml +# ollama-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + ollama: + keys: + - name: "ollama-local" + value: "" + weight: 1 + models: ["*"] + ollama_key_config: + url: "http://ollama.default.svc.cluster.local:11434" +``` + +```bash +helm install bifrost bifrost/bifrost -f ollama-values.yaml +``` + +Using an env var for the URL (useful across environments): + +```bash +kubectl create secret generic ollama-config \ + --from-literal=url='http://ollama.default.svc.cluster.local:11434' +``` + +```yaml + ollama_key_config: + url: "env.OLLAMA_URL" + + providerSecrets: + ollama-url: + existingSecret: "ollama-config" + key: "url" + envVar: "OLLAMA_URL" +``` + + + + +vLLM instances are model-specific — one key per served model. + +```yaml +# vllm-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + vllm: + keys: + - name: "vllm-llama3-70b" + value: "" + weight: 1 + models: ["llama-3-70b"] + vllm_key_config: + url: "http://vllm.default.svc.cluster.local:8000" + model_name: "meta-llama/Meta-Llama-3-70B-Instruct" + - name: "vllm-mistral" + value: "" + weight: 1 + models: ["mistral-7b"] + vllm_key_config: + url: "http://vllm-mistral.default.svc.cluster.local:8000" + model_name: "mistralai/Mistral-7B-Instruct-v0.3" +``` + +```bash +helm install bifrost bifrost/bifrost -f vllm-values.yaml +``` + + + + +```yaml +# sgl-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + sgl: + keys: + - name: "sgl-main" + value: "" + weight: 1 + models: ["*"] + sgl_key_config: + url: "http://sgl-router.default.svc.cluster.local:30000" +``` + +```bash +helm install bifrost bifrost/bifrost -f sgl-values.yaml +``` + + + + +These providers use `aliases` to map logical model names to provider-specific IDs. + +```yaml +bifrost: + providers: + huggingface: + keys: + - name: "hf-main" + value: "env.HF_API_KEY" + weight: 1 + models: ["llama-3", "mixtral"] + aliases: + llama-3: "meta-llama/Meta-Llama-3-8B-Instruct" + mixtral: "mistralai/Mixtral-8x7B-Instruct-v0.1" + + replicate: + keys: + - name: "replicate-main" + value: "env.REPLICATE_API_KEY" + weight: 1 + models: ["llama-3"] + aliases: + llama-3: "meta/meta-llama-3-70b-instruct" + replicate_key_config: + use_deployments_endpoint: false +``` + + + + + + + + +--- + +## Multi-Provider Example + +Combine providers in a single values file: + +```yaml +# multi-provider-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 2 + models: ["*"] + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" + weight: 1 + models: ["*"] + groq: + keys: + - name: "groq-primary" + value: "env.GROQ_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + openai-key: + existingSecret: "provider-keys" + key: "openai" + envVar: "OPENAI_API_KEY" + anthropic-key: + existingSecret: "provider-keys" + key: "anthropic" + envVar: "ANTHROPIC_API_KEY" + groq-key: + existingSecret: "provider-keys" + key: "groq" + envVar: "GROQ_API_KEY" + + plugins: + logging: + enabled: true + governance: + enabled: true +``` + +```bash +# Create a single secret with all provider keys +kubectl create secret generic provider-keys \ + --from-literal=openai='sk-your-openai-key' \ + --from-literal=anthropic='sk-ant-your-anthropic-key' \ + --from-literal=groq='gsk_your-groq-key' + +helm install bifrost bifrost/bifrost -f multi-provider-values.yaml +``` diff --git a/docs/deployment-guides/helm/storage.mdx b/docs/deployment-guides/helm/storage.mdx new file mode 100644 index 0000000000..244ece3fb2 --- /dev/null +++ b/docs/deployment-guides/helm/storage.mdx @@ -0,0 +1,550 @@ +--- +title: "Storage" +description: "Configure Bifrost storage backends in Helm — SQLite, PostgreSQL (embedded and external), per-store overrides, and S3/GCS object storage for logs" +icon: "database" +--- + +Bifrost persists two types of data — **config** (providers, virtual keys, governance rules) and **logs** (request/response records). Each has its own store, both defaulting to the top-level `storage.mode`. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `storage.mode` | Default backend for both stores (`sqlite` or `postgres`) | `sqlite` | +| `storage.configStore.type` | Override backend for the config store | `""` (inherits `storage.mode`) | +| `storage.logsStore.type` | Override backend for the logs store | `""` (inherits `storage.mode`) | + + +When any store uses SQLite the chart deploys a **StatefulSet** with a PVC. With PostgreSQL only (no SQLite) it deploys a **Deployment**. Mixing backends (e.g. config=postgres, logs=sqlite) still requires a StatefulSet. + + +--- + + + + + +### SQLite (Default) + +Simplest setup — no external database required. Bifrost runs as a StatefulSet with a persistent volume for the SQLite files. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `storage.persistence.enabled` | Create a PVC for SQLite data | `true` | +| `storage.persistence.size` | PVC size | `10Gi` | +| `storage.persistence.accessMode` | PVC access mode | `ReadWriteOnce` | +| `storage.persistence.storageClass` | Storage class (leave empty for cluster default) | `""` | +| `storage.persistence.existingClaim` | Reuse an existing PVC | `""` | + +```yaml +# sqlite-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: sqlite + persistence: + enabled: true + size: 20Gi + # storageClass: "gp3" # uncomment to pin storage class + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +helm install bifrost bifrost/bifrost -f sqlite-values.yaml +``` + +**Reuse an existing PVC** (e.g. after a StatefulSet migration): + +```yaml +storage: + persistence: + existingClaim: "bifrost-data" +``` + + +Upgrading from SQLite to PostgreSQL requires a data migration — the two stores are not compatible. Plan accordingly before switching `storage.mode` on a running deployment. + + +#### StatefulSet Migration (chart v2.0.0+) + +Prior to v2.0.0, SQLite used a Deployment + manual PVC. v2.0.0 moved SQLite to a StatefulSet. If upgrading from an older chart: + +```bash +# 1. Scale down the old deployment +kubectl scale deployment bifrost --replicas=0 + +# 2. Note the existing PVC name +kubectl get pvc + +# 3. Upgrade the chart, pointing at the existing claim +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.existingClaim= \ + --set image.tag=v1.4.11 +``` + + + + + +### Embedded PostgreSQL + +The chart can deploy a PostgreSQL instance alongside Bifrost. Good for simple production setups where you don't have an existing database. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `storage.mode` | Set to `postgres` | `sqlite` | +| `postgresql.enabled` | Deploy PostgreSQL as a sub-deployment | `false` | +| `postgresql.auth.username` | Database user | `bifrost` | +| `postgresql.auth.password` | Database password | `bifrost_password` | +| `postgresql.auth.database` | Database name | `bifrost` | +| `postgresql.primary.persistence.size` | PVC size for PostgreSQL data | `8Gi` | + + +Ensure the database is created with **UTF8 encoding**. The embedded PostgreSQL deployment handles this automatically. See [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement) for manual setups. + + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-secure-postgres-password' +``` + +```yaml +# embedded-postgres-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: true + auth: + username: bifrost + password: "your-secure-postgres-password" # use existingSecret in production + database: bifrost + primary: + persistence: + enabled: true + size: 50Gi + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +helm install bifrost bifrost/bifrost -f embedded-postgres-values.yaml +``` + +**Verify the connection from Bifrost:** + +```bash +kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432 +``` + + + + + +### External PostgreSQL + +Point Bifrost at an existing PostgreSQL instance — RDS, Cloud SQL, Azure Database, or self-managed. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `postgresql.enabled` | Must be `false` | `false` | +| `postgresql.external.enabled` | Enable external connection | `false` | +| `postgresql.external.host` | Hostname or IP | `""` | +| `postgresql.external.port` | Port | `5432` | +| `postgresql.external.user` | Username | `bifrost` | +| `postgresql.external.database` | Database name | `bifrost` | +| `postgresql.external.sslMode` | SSL mode (`disable`, `require`, `verify-ca`, `verify-full`) | `disable` | +| `postgresql.external.existingSecret` | Secret name for the password | `""` | +| `postgresql.external.passwordKey` | Key within the secret | `"password"` | + +```bash +kubectl create secret generic external-postgres-credentials \ + --from-literal=password='your-external-postgres-password' +``` + +```yaml +# external-postgres-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: false + external: + enabled: true + host: "your-rds-endpoint.us-east-1.rds.amazonaws.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "external-postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +helm install bifrost bifrost/bifrost -f external-postgres-values.yaml +``` + +**Test connectivity before installing:** + +```bash +kubectl run pg-test --image=postgres:16-alpine --rm -it --restart=Never -- \ + psql "host=your-rds-endpoint.us-east-1.rds.amazonaws.com dbname=bifrost user=bifrost sslmode=require" \ + -c "SELECT version();" +``` + + + + + +### Mixed Backend + +Run the config store on PostgreSQL (fast lookups, shared across replicas) while keeping logs on SQLite (simpler, cheaper for append-heavy workloads). + +```yaml +# mixed-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: sqlite # default fallback + configStore: + type: postgres # override: config uses postgres + logsStore: + type: sqlite # explicit: logs use sqlite + persistence: + enabled: true + size: 20Gi # for the SQLite logs store + +postgresql: + external: + enabled: true + host: "your-postgres-host.example.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-postgres-password' + +helm install bifrost bifrost/bifrost -f mixed-values.yaml +``` + + +In mixed mode, Bifrost deploys a StatefulSet (because SQLite is in use) with both a PostgreSQL connection and a local PVC for the SQLite log store. + + +**PostgreSQL connection pool tuning** (high log volume): + +```yaml +storage: + configStore: + type: postgres + maxIdleConns: 5 + maxOpenConns: 50 + logsStore: + type: postgres + maxIdleConns: 10 + maxOpenConns: 100 +``` + + + + + +--- + +## Object Storage for Logs + +Offload large request/response payloads from the database to S3 or GCS. The DB retains only lightweight index records; payloads are fetched on demand. + + + + +```bash +kubectl create secret generic s3-credentials \ + --from-literal=access-key-id='AKIAIOSFODNN7EXAMPLE' \ + --from-literal=secret-access-key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' +``` + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: s3 + bucket: "bifrost-logs" + prefix: "bifrost" + compress: true # gzip compression + + # S3 configuration + region: us-east-1 + accessKeyId: "env.S3_ACCESS_KEY_ID" + secretAccessKey: "env.S3_SECRET_ACCESS_KEY" + # endpoint: "" # Custom endpoint for MinIO / Cloudflare R2 + # forcePathStyle: false # Set true for MinIO + +bifrost: + # inject S3 credentials as env vars + providerSecrets: + s3-access-key: + existingSecret: "s3-credentials" + key: "access-key-id" + envVar: "S3_ACCESS_KEY_ID" + s3-secret-key: + existingSecret: "s3-credentials" + key: "secret-access-key" + envVar: "S3_SECRET_ACCESS_KEY" +``` + +**Using IAM role (IRSA / instance profile) instead of static keys:** + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: s3 + bucket: "bifrost-logs" + region: us-east-1 + # No accessKeyId / secretAccessKey — uses SDK default chain + roleArn: "arn:aws:iam::123456789012:role/BifrostS3Role" +``` + + + + +```bash +kubectl create secret generic gcs-credentials \ + --from-literal=service-account-json="$(cat service-account-key.json)" +``` + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: gcs + bucket: "bifrost-logs" + prefix: "bifrost" + compress: true + + # GCS configuration + projectId: "my-gcp-project" + credentialsJson: "env.GCS_CREDENTIALS_JSON" # omit for Workload Identity + +bifrost: + providerSecrets: + gcs-creds: + existingSecret: "gcs-credentials" + key: "service-account-json" + envVar: "GCS_CREDENTIALS_JSON" +``` + + + + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: s3 + bucket: "bifrost-logs" + prefix: "bifrost" + compress: false + + region: us-east-1 # can be any value for MinIO + endpoint: "http://minio.minio-ns.svc.cluster.local:9000" + accessKeyId: "env.MINIO_ACCESS_KEY" + secretAccessKey: "env.MINIO_SECRET_KEY" + forcePathStyle: true # required for MinIO +``` + + + + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + -f object-storage-values.yaml +``` + +--- + +## Vector Store + +A vector store is required for [semantic caching](/deployment-guides/helm/plugins). Choose from Weaviate, Redis, or Qdrant (embedded or external), or Pinecone (external only). + + + + +```yaml +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: true # deploy embedded Weaviate + replicas: 1 + persistence: + enabled: true + size: 20Gi + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi +``` + +**External Weaviate:** + +```yaml +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: false + external: + enabled: true + scheme: https + host: "weaviate.example.com" + apiKey: "env.WEAVIATE_API_KEY" + grpcHost: "weaviate-grpc.example.com" + grpcSecured: true + existingSecret: "weaviate-credentials" + apiKeyKey: "api-key" +``` + + + + +```yaml +vectorStore: + enabled: true + type: redis + redis: + enabled: true # deploy embedded Redis + auth: + enabled: true + password: "redis_password" + master: + persistence: + size: 8Gi +``` + +**External Redis / AWS MemoryDB:** + +```bash +kubectl create secret generic redis-credentials \ + --from-literal=password='your-redis-password' +``` + +```yaml +vectorStore: + enabled: true + type: redis + redis: + enabled: false + external: + enabled: true + host: "your-redis.cache.amazonaws.com" + port: 6379 + useTls: true + clusterMode: true # required for AWS MemoryDB + existingSecret: "redis-credentials" + passwordKey: "password" +``` + + + + +```yaml +vectorStore: + enabled: true + type: qdrant + qdrant: + enabled: true # deploy embedded Qdrant + persistence: + size: 10Gi +``` + +**External Qdrant:** + +```bash +kubectl create secret generic qdrant-credentials \ + --from-literal=api-key='your-qdrant-api-key' +``` + +```yaml +vectorStore: + enabled: true + type: qdrant + qdrant: + enabled: false + external: + enabled: true + host: "qdrant.example.com" + port: 6334 + useTls: true + existingSecret: "qdrant-credentials" + apiKeyKey: "api-key" +``` + + + + +Pinecone is external-only. + +```bash +kubectl create secret generic pinecone-credentials \ + --from-literal=api-key='your-pinecone-api-key' +``` + +```yaml +vectorStore: + enabled: true + type: pinecone + pinecone: + external: + enabled: true + indexHost: "your-index.svc.us-east1-gcp.pinecone.io" + existingSecret: "pinecone-credentials" + apiKeyKey: "api-key" +``` + + + + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + -f storage-values.yaml +``` diff --git a/docs/deployment-guides/helm/troubleshooting.mdx b/docs/deployment-guides/helm/troubleshooting.mdx new file mode 100644 index 0000000000..1a46d0219d --- /dev/null +++ b/docs/deployment-guides/helm/troubleshooting.mdx @@ -0,0 +1,401 @@ +--- +title: "Troubleshooting" +description: "Diagnose and fix common issues with Bifrost Helm deployments — pods, database, ingress, secrets, PVCs, and performance" +icon: "wrench" +--- + +This page covers the most common problems encountered when deploying Bifrost with Helm, along with diagnostic commands and fixes. + +--- + +## Pod Not Starting + +### Quick diagnostics + +```bash +# Show pod status +kubectl get pods -l app.kubernetes.io/name=bifrost + +# Show pod events (most useful first step) +kubectl describe pod -l app.kubernetes.io/name=bifrost + +# Show pod logs (use --previous if the pod has already crashed) +kubectl logs -l app.kubernetes.io/name=bifrost +kubectl logs -l app.kubernetes.io/name=bifrost --previous +``` + +### Image pull errors (`ErrImagePull` / `ImagePullBackOff`) + +```bash +# Check which image is being pulled +kubectl describe pod -l app.kubernetes.io/name=bifrost | grep "Image:" + +# Verify imagePullSecrets are attached +kubectl get pod -l app.kubernetes.io/name=bifrost -o jsonpath='{.items[0].spec.imagePullSecrets}' + +# Test secret manually +kubectl get secret -o jsonpath='{.data.\.dockerconfigjson}' | base64 -d | jq . +``` + +Common causes: +- `image.tag` not set — the chart requires it; the pod will not start without it +- Pull secret missing or expired (ECR tokens expire after 12 hours) +- Incorrect `image.repository` for enterprise registry + +```bash +# Fix: set the correct tag +helm upgrade bifrost bifrost/bifrost --reuse-values --set image.tag=v1.4.11 +``` + +### PVC not binding (`Pending`) + +```bash +# Check PVC status +kubectl get pvc -l app.kubernetes.io/instance=bifrost + +# Show binding events +kubectl describe pvc -l app.kubernetes.io/instance=bifrost +``` + +Common causes: +- No Persistent Volume provisioner in the cluster +- `storageClass` set to a class that doesn't exist +- `ReadWriteOnce` access mode with multiple replicas (SQLite PVCs are single-node) + +```bash +# List available storage classes +kubectl get storageclass + +# Fix: pin to a valid storage class +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.storageClass=standard +``` + +### ConfigMap / Secret errors + +```bash +# View the generated ConfigMap (contains rendered config.json) +kubectl get configmap bifrost-config -o yaml + +# View secrets the pod depends on +kubectl get secret -l app.kubernetes.io/instance=bifrost + +# Decode a specific secret value +kubectl get secret bifrost-encryption -o jsonpath='{.data.key}' | base64 -d +``` + +### CrashLoopBackOff + +```bash +# Get last log lines before the crash +kubectl logs -l app.kubernetes.io/name=bifrost --previous --tail=50 + +# Common causes shown in logs: +# "encryption key is required" → bifrost.encryptionKey or encryptionKeySecret not set +# "failed to connect to database" → see Database section below +# "image.tag is required" → set image.tag in values +``` + +--- + +## Database Connection Issues + +### Embedded PostgreSQL + +```bash +# Check if the PostgreSQL pod is running +kubectl get pods -l app.kubernetes.io/name=bifrost-postgresql + +# Connect directly to inspect the database +kubectl exec -it deployment/bifrost-postgresql -- psql -U bifrost -d bifrost + +# Test connectivity from the Bifrost pod +kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432 + +# Check PostgreSQL logs +kubectl logs deployment/bifrost-postgresql --tail=50 +``` + +### External PostgreSQL + +```bash +# Test connectivity from within the cluster +kubectl run pg-test --image=postgres:16-alpine --rm -it --restart=Never -- \ + psql "host=your-db-host dbname=bifrost user=bifrost sslmode=require" + +# Verify the secret value is correct +kubectl get secret postgres-credentials -o jsonpath='{.data.password}' | base64 -d + +# Check that the external host/port is reachable +kubectl exec -it deployment/bifrost -- nc -zv your-db-host 5432 +``` + +Common causes: +- `sslMode: disable` when the database requires SSL — set `sslMode: require` +- Password in secret doesn't match the database user +- Network policy blocking pod → database traffic +- Database not UTF8 encoded (see [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement)) + +```bash +# Fix: update the secret and restart +kubectl create secret generic postgres-credentials \ + --from-literal=password='correct-password' \ + --dry-run=client -o yaml | kubectl apply -f - + +kubectl rollout restart deployment/bifrost +``` + +--- + +## Ingress Not Working + +```bash +# Check ingress resource status +kubectl describe ingress bifrost + +# Check if the ingress controller is running +kubectl get pods -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx + +# View ingress controller logs for routing errors +kubectl logs -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx --tail=50 + +# Verify DNS resolves to the correct load balancer IP +nslookup bifrost.yourdomain.com +kubectl get ingress bifrost -o jsonpath='{.status.loadBalancer.ingress[0].ip}' + +# Test without TLS first +curl -v http://bifrost.yourdomain.com/health +``` + +Common causes: +- `ingress.className` not set or set to a class not installed in the cluster +- TLS certificate not issued yet (cert-manager can take up to 60 seconds) +- Service port mismatch — Bifrost listens on `8080` by default + +```bash +# Check cert-manager certificate status +kubectl get certificate -l app.kubernetes.io/instance=bifrost +kubectl describe certificate bifrost-tls +``` + +--- + +## Secret and Credential Issues + +### Provider API key not resolving + +If Bifrost logs show `env.OPENAI_API_KEY: not set` or similar: + +```bash +# Check the env var is present in the running pod +kubectl exec -it deployment/bifrost -- env | grep OPENAI + +# Verify the providerSecrets secret exists with the right key +kubectl get secret provider-api-keys -o yaml + +# Check the providerSecrets configuration rendered correctly +kubectl get configmap bifrost-config -o yaml | grep -A5 providers +``` + +### Encryption key issues + +```bash +# Verify the secret exists and contains the right key name +kubectl get secret bifrost-encryption -o yaml + +# Check the exact key name matches encryptionKeySecret.key in values +# Default key name is "encryption-key" — if you used "key", set: +# bifrost.encryptionKeySecret.key: "key" +``` + +--- + +## High Memory Usage + +```bash +# Check current resource usage +kubectl top pods -l app.kubernetes.io/name=bifrost + +# Check if OOM kills are happening +kubectl describe pod -l app.kubernetes.io/name=bifrost | grep -A3 "OOMKilled\|Limits" + +# View resource requests/limits on running pods +kubectl get pod -l app.kubernetes.io/name=bifrost \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].resources}{"\n"}{end}' +``` + +**Increase resource limits:** + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set resources.limits.memory=4Gi \ + --set resources.requests.memory=1Gi +``` + +**Tune Go runtime** (see [Docker Tuning](/deployment-guides/docker-tuning)): + +```yaml +env: + - name: GOGC + value: "200" # run GC less often + - name: GOMEMLIMIT + value: "3500MiB" # hard memory ceiling slightly below the container limit +``` + +--- + +## High CPU Usage / Latency + +```bash +# Check CPU usage +kubectl top pods -l app.kubernetes.io/name=bifrost + +# Check if HPA is scaling correctly +kubectl get hpa bifrost +kubectl describe hpa bifrost +``` + +Common causes: +- `initialPoolSize` too small — goroutines queuing up; increase to `500`–`1000` +- `dropExcessRequests: false` with a small pool — queue depth growing unboundedly + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.client.initialPoolSize=1000 \ + --set bifrost.client.dropExcessRequests=true +``` + +--- + +## Autoscaling Issues + +### HPA not scaling + +```bash +# Check HPA status and current metrics +kubectl describe hpa bifrost + +# Verify metrics server is installed +kubectl top nodes +kubectl top pods + +# Common fix: metrics server not installed +# Install with: +kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml +``` + +### Pods scaling down too aggressively (drops active SSE streams) + +The default `scaleDown.stabilizationWindowSeconds: 300` and `preStop` sleep of 15 seconds should prevent this. If streams are still being cut: + +```yaml +terminationGracePeriodSeconds: 120 # increase if streams run longer than 105s + +autoscaling: + behavior: + scaleDown: + stabilizationWindowSeconds: 600 # wait 10 min before scaling down + policies: + - type: Pods + value: 1 + periodSeconds: 300 # remove at most 1 pod per 5 min + +lifecycle: + preStop: + exec: + command: ["sh", "-c", "sleep 30"] # give load balancer more time to drain +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f graceful-shutdown-values.yaml +``` + +--- + +## SQLite / PVC Issues + +### StatefulSet migration (upgrading from chart < v2.0.0) + +Older chart versions used a Deployment + manual PVC. v2.0.0 moved SQLite to a StatefulSet. If upgrading: + +```bash +# 1. Scale down the old deployment +kubectl scale deployment bifrost --replicas=0 + +# 2. Note the existing PVC name +kubectl get pvc + +# 3. Upgrade, pointing at the existing claim +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.existingClaim= \ + --set image.tag=v1.4.11 +``` + +### Data lost after upgrade + +```bash +# Check if PVCs still exist (they persist after helm uninstall) +kubectl get pvc -l app.kubernetes.io/instance=bifrost + +# Re-attach by setting existingClaim +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.existingClaim= +``` + +--- + +## Cluster Mode Issues + +### Peers not discovering each other + +```bash +# Check gossip port is reachable between pods +kubectl exec -it bifrost-0 -- nc -zv bifrost-1.bifrost-headless 7946 + +# View gossip-related log lines +kubectl logs -l app.kubernetes.io/name=bifrost --tail=100 | grep -i gossip + +# Check the headless service exists +kubectl get svc bifrost-headless +``` + +For Kubernetes-based discovery, verify the service account has pod list permissions: + +```bash +kubectl auth can-i list pods --as=system:serviceaccount:default:bifrost +``` + +--- + +## Useful Diagnostic Commands + +```bash +# Full state dump for a support ticket +kubectl get all -l app.kubernetes.io/instance=bifrost +kubectl describe pod -l app.kubernetes.io/name=bifrost > pod-describe.txt +kubectl logs -l app.kubernetes.io/name=bifrost --tail=200 > pod-logs.txt + +# View the full rendered config.json +kubectl get configmap bifrost-config -o jsonpath='{.data.config\.json}' | jq . + +# Check current Helm values (shows all overrides) +helm get values bifrost + +# Check Helm release status +helm status bifrost + +# View Helm release history +helm history bifrost +``` + +--- + +## Still Stuck? + +- [GitHub Issues](https://github.com/maximhq/bifrost/issues) — search existing issues or open a new one +- [Enterprise Support](mailto:support@getmaxim.ai) — for enterprise customers with SLA diff --git a/docs/deployment-guides/helm/values.mdx b/docs/deployment-guides/helm/values.mdx new file mode 100644 index 0000000000..3161b206fb --- /dev/null +++ b/docs/deployment-guides/helm/values.mdx @@ -0,0 +1,718 @@ +--- +title: "Values Reference" +description: "Complete reference for Bifrost Helm chart values — key parameters, how to supply them, and links to example files" +icon: "sliders" +--- + +This page covers every top-level parameter group in the Bifrost Helm chart's `values.yaml`, how to supply values via `--set` vs `-f`, and where to find ready-made example files. + + +The full values schema is available at [https://getbifrost.ai/schema](https://getbifrost.ai/schema). All `values.yaml` fields map directly to `config.json` fields generated by the chart. + + +## Supplying Values + +### One-liner with `--set` + +Good for a single field or quick experiments: + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set replicaCount=3 \ + --set bifrost.client.initialPoolSize=500 +``` + +### Values file with `-f` + +Recommended for anything beyond a couple of fields: + +```bash +# Create your values file +cat > my-values.yaml <<'EOF' +image: + tag: "v1.4.11" + +replicaCount: 2 + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" + client: + initialPoolSize: 500 + enableLogging: true +EOF + +# Install +helm install bifrost bifrost/bifrost -f my-values.yaml + +# Upgrade later +helm upgrade bifrost bifrost/bifrost -f my-values.yaml + +# Upgrade and reuse all previously set values, overriding only one field +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set replicaCount=5 +``` + +### Multiple values files + +Later files override earlier ones — useful for a base + environment-specific overlay: + +```bash +helm install bifrost bifrost/bifrost \ + -f base-values.yaml \ + -f production-overrides.yaml +``` + +--- + +## Key Parameters Reference + +### Image + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `image.repository` | Container image repository | `docker.io/maximhq/bifrost` | +| `image.tag` | **Required.** Image version (e.g. `v1.4.11`) | `""` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `imagePullSecrets` | List of pull secret names for private registries | `[]` | + +```bash +# Always specify the tag — the chart will not start without it +helm install bifrost bifrost/bifrost --set image.tag=v1.4.11 +``` + +### Replicas & Autoscaling + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `replicaCount` | Static replica count (ignored when HPA is enabled) | `1` | +| `autoscaling.enabled` | Enable Horizontal Pod Autoscaler | `false` | +| `autoscaling.minReplicas` | Minimum replicas | `1` | +| `autoscaling.maxReplicas` | Maximum replicas | `10` | +| `autoscaling.targetCPUUtilizationPercentage` | CPU target for scaling | `80` | +| `autoscaling.targetMemoryUtilizationPercentage` | Memory target for scaling | `80` | +| `autoscaling.behavior.scaleDown.stabilizationWindowSeconds` | Cooldown before scale-down (important for SSE streams) | `300` | +| `autoscaling.behavior.scaleDown.policies[0].value` | Max pods removed per period | `1` | + +### Resources + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `resources.requests.cpu` | CPU request | `500m` | +| `resources.requests.memory` | Memory request | `512Mi` | +| `resources.limits.cpu` | CPU limit | `2000m` | +| `resources.limits.memory` | Memory limit | `2Gi` | + +### Service + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `service.type` | `ClusterIP`, `LoadBalancer`, or `NodePort` | `ClusterIP` | +| `service.port` | Service port | `8080` | + +### Ingress + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `ingress.enabled` | Enable ingress | `false` | +| `ingress.className` | Ingress class (e.g. `nginx`, `traefik`) | `""` | +| `ingress.annotations` | Ingress annotations | `{}` | +| `ingress.hosts` | Host rules | see values.yaml | +| `ingress.tls` | TLS configuration | `[]` | + +```yaml +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + hosts: + - host: bifrost.yourdomain.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: bifrost-tls + hosts: + - bifrost.yourdomain.com +``` + +### Probes + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `livenessProbe.initialDelaySeconds` | Seconds before first liveness check | `30` | +| `livenessProbe.periodSeconds` | Liveness check interval | `30` | +| `readinessProbe.initialDelaySeconds` | Seconds before first readiness check | `10` | +| `readinessProbe.periodSeconds` | Readiness check interval | `10` | + +Both probes hit `GET /health`. + +### Graceful Shutdown + +Bifrost supports long-lived SSE streaming connections. The default `preStop` hook and termination grace period let in-flight streams finish before the pod is killed: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `terminationGracePeriodSeconds` | Total grace period | `60` | +| `lifecycle.preStop.exec.command` | Sleep before SIGTERM so load balancer drains | `["sh", "-c", "sleep 15"]` | + +Increase `terminationGracePeriodSeconds` if your typical stream responses take longer than 45 seconds. + +### Service Account + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `serviceAccount.create` | Create a dedicated service account | `true` | +| `serviceAccount.annotations` | Annotations (e.g. for IRSA, Workload Identity) | `{}` | +| `serviceAccount.name` | Override the generated name | `""` | + +### Pod Scheduling + +```yaml +# Spread replicas across nodes +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +# Pin to specific node pool +nodeSelector: + node-type: ai-workload + +# Tolerate GPU taints +tolerations: + - key: "gpu" + operator: "Equal" + value: "true" + effect: "NoSchedule" +``` + +### Extra Environment Variables + +Three ways to inject env vars: + +```yaml +# Inline key/value pairs +env: + - name: HTTP_PROXY + value: "http://proxy.corp.example.com:3128" + +# Map syntax (appended after env) +extraEnv: + NO_PROXY: "169.254.169.254,10.0.0.0/8" + +# Bulk-load from existing Secrets or ConfigMaps +envFrom: + - secretRef: + name: my-corp-secrets + - configMapRef: + name: my-app-config +``` + +### Init Containers + +```yaml +initContainers: + - name: wait-for-db + image: busybox:1.35 + command: ["sh", "-c", "until nc -z postgres-svc 5432; do sleep 2; done"] +``` + +--- + +## Values Examples + +The chart ships ready-made example files under [`helm-charts/bifrost/values-examples/`](https://github.com/maximhq/bifrost/tree/main/helm-charts/bifrost/values-examples): + +| File | Use case | +|------|----------| +| `sqlite-only.yaml` | Minimal local/dev setup | +| `postgres-only.yaml` | Single-store Postgres | +| `production-ha.yaml` | HA: 3 replicas, Postgres, Weaviate, HPA, Ingress | +| `providers-and-virtual-keys.yaml` | All 23 providers + 7 virtual key patterns | +| `secrets-from-k8s.yaml` | All sensitive values from Kubernetes Secrets | +| `external-postgres.yaml` | Point at an existing Postgres instance | +| `postgres-redis.yaml` | Postgres + Redis vector store | +| `postgres-weaviate.yaml` | Postgres + Weaviate vector store | +| `postgres-qdrant.yaml` | Postgres + Qdrant vector store | +| `semantic-cache-secret-example.yaml` | Semantic cache with secret injection | +| `mixed-backend.yaml` | Config store = postgres, logs store = sqlite | + +Install from an example file directly: + +```bash +helm install bifrost bifrost/bifrost \ + -f https://raw.githubusercontent.com/maximhq/bifrost/main/helm-charts/bifrost/values-examples/production-ha.yaml \ + --set image.tag=v1.4.11 +``` + +--- + +## Helm Operations + +### View current values + +```bash +helm get values bifrost +``` + +### Diff before upgrading (requires helm-diff plugin) + +```bash +helm diff upgrade bifrost bifrost/bifrost -f my-values.yaml +``` + +### Rollback + +```bash +helm history bifrost +helm rollback bifrost # to previous revision +helm rollback bifrost 2 # to revision 2 +``` + +### Uninstall + +```bash +helm uninstall bifrost + +# Also remove PVCs (deletes all data) +kubectl delete pvc -l app.kubernetes.io/instance=bifrost +``` + +--- + +## All Key Parameters + +A quick-reference table of the most commonly used top-level parameters: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `image.tag` | **Required.** Bifrost image version (e.g., `v1.4.11`) | `""` | +| `replicaCount` | Number of replicas | `1` | +| `storage.mode` | Storage backend (`sqlite` or `postgres`) | `sqlite` | +| `storage.persistence.size` | PVC size for SQLite | `10Gi` | +| `postgresql.enabled` | Deploy embedded PostgreSQL | `false` | +| `vectorStore.enabled` | Enable vector store | `false` | +| `vectorStore.type` | Vector store type (`weaviate`, `redis`, `qdrant`) | `none` | +| `bifrost.encryptionKey` | Encryption key (use `encryptionKeySecret` in production) | `""` | +| `ingress.enabled` | Enable ingress | `false` | +| `autoscaling.enabled` | Enable HPA | `false` | + +### Secret Reference Parameters + +Use existing Kubernetes Secrets instead of plain-text values. Every sensitive field in the chart has a corresponding `existingSecret` / `secretRef` alternative: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.encryptionKeySecret.name` | Secret name for encryption key | `""` | +| `bifrost.encryptionKeySecret.key` | Key within the secret | `"encryption-key"` | +| `postgresql.external.existingSecret` | Secret name for PostgreSQL password | `""` | +| `postgresql.external.passwordKey` | Key within the secret | `"password"` | +| `vectorStore.redis.external.existingSecret` | Secret name for Redis password | `""` | +| `vectorStore.redis.external.passwordKey` | Key within the secret | `"password"` | +| `vectorStore.weaviate.external.existingSecret` | Secret name for Weaviate API key | `""` | +| `vectorStore.weaviate.external.apiKeyKey` | Key within the secret | `"api-key"` | +| `vectorStore.qdrant.external.existingSecret` | Secret name for Qdrant API key | `""` | +| `vectorStore.qdrant.external.apiKeyKey` | Key within the secret | `"api-key"` | +| `bifrost.plugins.maxim.secretRef.name` | Secret name for Maxim API key | `""` | +| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` | +| `bifrost.providerSecrets..existingSecret` | Secret name for provider API key | `""` | +| `bifrost.providerSecrets..key` | Key within the secret | `"api-key"` | +| `bifrost.providerSecrets..envVar` | Environment variable name to inject | `""` | + +--- + +## Advanced Configuration + +### Comprehensive Example + +A production-ready values file combining the most common settings: + +```yaml +# my-values.yaml +image: + tag: "v1.4.11" + +replicaCount: 3 + +storage: + mode: postgres + +postgresql: + enabled: true + auth: + password: "secure-password" # use existingSecret in production + +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 10 + +ingress: + enabled: true + className: nginx + hosts: + - host: bifrost.example.com + paths: + - path: / + pathType: Prefix + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "key" + providers: + openai: + keys: + - name: "primary" + value: "env.OPENAI_API_KEY" + weight: 1 + providerSecrets: + openai: + existingSecret: "provider-api-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f my-values.yaml +``` + +### Node Affinity & Scheduling + +Deploy to specific nodes and spread replicas across hosts: + +```yaml +nodeSelector: + node-type: ai-workload + +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +tolerations: + - key: "gpu" + operator: "Equal" + value: "true" + effect: "NoSchedule" +``` + +### Deployment & Pod Annotations + +Useful for tooling like [Keel](https://keel.sh) for automatic image updates or Datadog APM injection: + +```yaml +deploymentAnnotations: + keel.sh/policy: force + keel.sh/trigger: poll + +podAnnotations: + ad.datadoghq.com/bifrost.logs: '[{"source":"bifrost","service":"bifrost"}]' +``` + +--- + +## Common Patterns + +Ready-made values files for the most common deployment scenarios. Each pattern builds on the [quickstart](/deployment-guides/helm). + + + + +Simple setup for local testing. SQLite, single replica, no autoscaling. + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set 'bifrost.providers.openai.keys[0].name=dev-key' \ + --set 'bifrost.providers.openai.keys[0].value=sk-your-key' \ + --set 'bifrost.providers.openai.keys[0].weight=1' +``` + +```bash +# Access +kubectl port-forward svc/bifrost 8080:8080 +``` + + + + +Multiple LLM providers with weighted load balancing. + +```bash +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' \ + --from-literal=anthropic-api-key='sk-ant-...' \ + --from-literal=gemini-api-key='your-gemini-key' +``` + +```yaml +# multi-provider.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKey: "your-encryption-key" + + client: + enableLogging: true + allowDirectKeys: false + + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 2 # 50% of traffic + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" + weight: 1 # 25% + gemini: + keys: + - name: "gemini-primary" + value: "env.GEMINI_API_KEY" + weight: 1 # 25% + + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + anthropic: + existingSecret: "provider-keys" + key: "anthropic-api-key" + envVar: "ANTHROPIC_API_KEY" + gemini: + existingSecret: "provider-keys" + key: "gemini-api-key" + envVar: "GEMINI_API_KEY" + + plugins: + telemetry: + enabled: true + logging: + enabled: true +``` + +```bash +helm install bifrost bifrost/bifrost -f multi-provider.yaml +``` + + + + +Use an existing PostgreSQL instance — RDS, Cloud SQL, Azure Database, or self-managed. + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-external-postgres-password' +``` + +```yaml +# external-db.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: false + external: + enabled: true + host: "your-rds-endpoint.us-east-1.rds.amazonaws.com" + port: 5432 + user: "bifrost" + database: "bifrost" + sslMode: "require" + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKey: "your-encryption-key" + + providers: + openai: + keys: + - name: "openai-primary" + value: "sk-..." + weight: 1 +``` + +```bash +helm install bifrost bifrost/bifrost -f external-db.yaml +``` + + + + +Semantic response caching for high-volume AI inference. + +```bash +kubectl create secret generic bifrost-encryption \ + --from-literal=key='your-32-byte-encryption-key' + +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-your-key' +``` + +```yaml +# ai-workload.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: true + auth: + password: "secure-password" + primary: + persistence: + size: 50Gi + +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: true + persistence: + size: 50Gi + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "key" + + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 1 + + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + + plugins: + semanticCache: + enabled: true + config: + provider: "openai" + keys: + - value: "env.OPENAI_API_KEY" + weight: 1 + embedding_model: "text-embedding-3-small" + dimension: 1536 + threshold: 0.85 + ttl: "1h" + cache_by_model: true + cache_by_provider: true +``` + +```bash +helm install bifrost bifrost/bifrost -f ai-workload.yaml +``` + + + + +Zero credentials in values files — all sensitive data in Kubernetes Secrets. + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-postgres-password' + +kubectl create secret generic bifrost-encryption \ + --from-literal=key='your-encryption-key' + +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' \ + --from-literal=anthropic-api-key='sk-ant-...' + +kubectl create secret generic qdrant-credentials \ + --from-literal=api-key='your-qdrant-api-key' +``` + +```yaml +# secrets-only.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: false + external: + enabled: true + host: "postgres.example.com" + port: 5432 + user: "bifrost" + database: "bifrost" + sslMode: "require" + existingSecret: "postgres-credentials" + passwordKey: "password" + +vectorStore: + enabled: true + type: qdrant + qdrant: + enabled: false + external: + enabled: true + host: "qdrant.example.com" + port: 6334 + existingSecret: "qdrant-credentials" + apiKeyKey: "api-key" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "key" + + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 1 + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" + weight: 1 + + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + anthropic: + existingSecret: "provider-keys" + key: "anthropic-api-key" + envVar: "ANTHROPIC_API_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f secrets-only.yaml +``` + + + diff --git a/docs/docs.json b/docs/docs.json index 155b91a096..1eecc3dc0e 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -363,10 +363,30 @@ "pages": [ "deployment-guides/k8s", "deployment-guides/ecs", - "deployment-guides/helm", "deployment-guides/fly" ] }, + { + "group": "Config as Code", + "icon": "code", + "pages": [ + { + "group": "Helm", + "icon": "helicopter-symbol", + "pages": [ + "deployment-guides/helm", + "deployment-guides/helm/values", + "deployment-guides/helm/client", + "deployment-guides/helm/providers", + "deployment-guides/helm/storage", + "deployment-guides/helm/plugins", + "deployment-guides/helm/governance", + "deployment-guides/helm/cluster", + "deployment-guides/helm/troubleshooting" + ] + } + ] + }, { "group": "Enterprise Deployment", "icon": "building",