diff --git a/.github/workflows/dependabot-alerts.yml b/.github/workflows/dependabot-alerts.yml index f92d41280c..0fc9c66801 100644 --- a/.github/workflows/dependabot-alerts.yml +++ b/.github/workflows/dependabot-alerts.yml @@ -12,10 +12,12 @@ jobs: create-issues: runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 - name: Create issues from Dependabot alerts env: diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 60d8715ebc..7955983088 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -16,10 +16,15 @@ jobs: dependency-review: runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.deps.dev:443 + api.github.com:443 + api.securityscorecards.dev:443 + github.com:443 - name: 'Checkout Repository' uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 diff --git a/.github/workflows/docs-validation.yml b/.github/workflows/docs-validation.yml index 772fd50f0a..d10dacfcbb 100644 --- a/.github/workflows/docs-validation.yml +++ b/.github/workflows/docs-validation.yml @@ -17,10 +17,18 @@ jobs: name: Check Broken Links runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 + github.com:443 + nodejs.org:443 + ph.mintlify.com:443 + registry.npmjs.org:443 + release-assets.githubusercontent.com:443 + storage.googleapis.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index ec3ffbddca..e48994c743 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -32,7 +32,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 diff --git a/.github/workflows/helm-release.yml b/.github/workflows/helm-release.yml index 69128ae643..aaebd1ab59 100644 --- a/.github/workflows/helm-release.yml +++ b/.github/workflows/helm-release.yml @@ -10,16 +10,26 @@ on: workflow_dispatch: permissions: - contents: write + contents: write jobs: release: runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 + get.helm.sh:443 + github.com:443 + maximhq.github.io:443 + proxy.golang.org:443 + release-assets.githubusercontent.com:443 + storage.googleapis.com:443 + sum.golang.org:443 + uploads.github.com:443 - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/.github/workflows/openapi-bundle.yml b/.github/workflows/openapi-bundle.yml index 7cd8c232e8..44fe44b779 100644 --- a/.github/workflows/openapi-bundle.yml +++ b/.github/workflows/openapi-bundle.yml @@ -20,10 +20,14 @@ jobs: name: Bundle OpenAPI Spec runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + files.pythonhosted.org:443 + github.com:443 + pypi.org:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index 3fbdaa4203..adb43b95f8 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -77,7 +77,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 diff --git a/.github/workflows/release-cli.yml b/.github/workflows/release-cli.yml index f143fbbcfe..b843195885 100644 --- a/.github/workflows/release-cli.yml +++ b/.github/workflows/release-cli.yml @@ -4,7 +4,7 @@ on: push: branches: - main - + # Prevent concurrent runs concurrency: group: release-cli @@ -20,10 +20,12 @@ jobs: version: ${{ steps.get-version.outputs.version }} tag_exists: ${{ steps.check-tag.outputs.exists }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + github.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -65,7 +67,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Run CLI tests working-directory: cli @@ -95,7 +97,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Configure Git run: | diff --git a/.github/workflows/release-pipeline.yml b/.github/workflows/release-pipeline.yml index 90bb9c86f3..805ee6ca9b 100644 --- a/.github/workflows/release-pipeline.yml +++ b/.github/workflows/release-pipeline.yml @@ -20,10 +20,11 @@ jobs: outputs: should-skip: ${{ steps.check.outputs.should-skip }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: >+ - name: Check if pipeline should be skipped id: check @@ -54,10 +55,21 @@ jobs: framework-version: ${{ steps.detect.outputs.framework-version }} transport-version: ${{ steps.detect.outputs.transport-version }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + _http._tcp.azure.archive.ubuntu.com:443 + _https._tcp.esm.ubuntu.com:443 + _https._tcp.motd.ubuntu.com:443 + _https._tcp.packages.microsoft.com:443 + azure.archive.ubuntu.com:80 + dl.google.com:443 + esm.ubuntu.com:443 + github.com:443 + packages.microsoft.com:443 + registry.hub.docker.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -74,568 +86,9 @@ jobs: id: detect run: ./.github/workflows/scripts/detect-all-changes.sh "auto" - # Run all tests in parallel before any releases - test-core: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.core-needs-release == 'true' - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 - with: - node-version: "25" - - - name: Run core tests - env: - MAXIM_API_KEY: ${{ secrets.MAXIM_API_KEY }} - MAXIM_LOGGER_ID: ${{ secrets.MAXIM_LOG_REPO_ID }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_SESSION_TOKEN: ${{ secrets.AWS_SESSION_TOKEN }} - AWS_ARN: ${{ secrets.AWS_ARN }} - BEDROCK_API_KEY: ${{ secrets.BEDROCK_API_KEY }} - AZURE_ENDPOINT: ${{ secrets.AZURE_ENDPOINT }} - AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} - AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }} - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} - AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} - MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - PARASAIL_API_KEY: ${{ secrets.PARASAIL_API_KEY }} - ELEVENLABS_API_KEY: ${{ secrets.ELEVENLABS_API_KEY }} - PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }} - SGL_API_KEY: ${{ secrets.SGL_API_KEY }} - CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }} - COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} - FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} - VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }} - VERTEX_PROJECT_ID: ${{ secrets.VERTEX_PROJECT_ID }} - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - HUGGING_FACE_API_KEY: ${{ secrets.HUGGING_FACE_API_KEY }} - AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - AWS_BEDROCK_ROLE_ARN: ${{ secrets.AWS_BEDROCK_ROLE_ARN }} - BIFROST_ENCRYPTION_KEY: ${{ secrets.BIFROST_ENCRYPTION_KEY }} - run: ./.github/workflows/scripts/test-core.sh - - # Approval gate for flaky test-core failures - # If test-core fails (often due to flaky provider API calls), this job waits for manual approval - # to continue the release pipeline without requiring a full re-run - approve-flaky-test-core: - needs: [check-skip, detect-changes, test-core] - if: | - always() && - needs.check-skip.outputs.should-skip != 'true' && - needs.detect-changes.outputs.core-needs-release == 'true' && - needs.test-core.result == 'failure' - runs-on: ubuntu-latest - environment: - name: flaky-test-override - url: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - outputs: - approved: ${{ steps.approve.outputs.approved }} - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Display failed test info - run: | - echo "::warning::test-core failed. Review the logs to determine if this is a flaky test." - echo "If this is a known flaky test (e.g., provider API timeout), approve to continue." - echo "If this is a real failure, reject and fix the issue." - - name: Mark as approved - id: approve - run: echo "approved=true" >> $GITHUB_OUTPUT - - test-framework: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.framework-needs-release == 'true' - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Docker Compose - run: | - docker --version - if ! docker compose version >/dev/null 2>&1; then - echo "Installing Docker Compose..." - sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose - sudo chmod +x /usr/local/bin/docker-compose - docker-compose --version - else - echo "Docker Compose plugin is available" - docker compose version - fi - - - name: Run framework tests - env: - MAXIM_API_KEY: ${{ secrets.MAXIM_API_KEY }} - MAXIM_LOGGER_ID: ${{ secrets.MAXIM_LOG_REPO_ID }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_SESSION_TOKEN: ${{ secrets.AWS_SESSION_TOKEN }} - AWS_ARN: ${{ secrets.AWS_ARN }} - BEDROCK_API_KEY: ${{ secrets.BEDROCK_API_KEY }} - AZURE_ENDPOINT: ${{ secrets.AZURE_ENDPOINT }} - AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} - AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }} - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} - AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} - MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - PARASAIL_API_KEY: ${{ secrets.PARASAIL_API_KEY }} - ELEVENLABS_API_KEY: ${{ secrets.ELEVENLABS_API_KEY }} - PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }} - SGL_API_KEY: ${{ secrets.SGL_API_KEY }} - CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }} - COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} - FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} - VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }} - VERTEX_PROJECT_ID: ${{ secrets.VERTEX_PROJECT_ID }} - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - HUGGING_FACE_API_KEY: ${{ secrets.HUGGING_FACE_API_KEY }} - BIFROST_ENCRYPTION_KEY: ${{ secrets.BIFROST_ENCRYPTION_KEY }} - run: ./.github/workflows/scripts/test-framework.sh - - test-plugins: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.plugins-need-release == 'true' - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Install jq - run: | - sudo apt-get update - sudo apt-get install -y jq - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Docker Compose - run: | - docker --version - if ! docker compose version >/dev/null 2>&1; then - echo "Installing Docker Compose..." - sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose - sudo chmod +x /usr/local/bin/docker-compose - docker-compose --version - else - echo "Docker Compose plugin is available" - docker compose version - fi - - - name: Run plugin tests - env: - MAXIM_API_KEY: ${{ secrets.MAXIM_API_KEY }} - MAXIM_LOGGER_ID: ${{ secrets.MAXIM_LOG_REPO_ID }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_SESSION_TOKEN: ${{ secrets.AWS_SESSION_TOKEN }} - AWS_ARN: ${{ secrets.AWS_ARN }} - BEDROCK_API_KEY: ${{ secrets.BEDROCK_API_KEY }} - AZURE_ENDPOINT: ${{ secrets.AZURE_ENDPOINT }} - AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} - AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }} - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} - AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} - MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - PARASAIL_API_KEY: ${{ secrets.PARASAIL_API_KEY }} - ELEVENLABS_API_KEY: ${{ secrets.ELEVENLABS_API_KEY }} - PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }} - SGL_API_KEY: ${{ secrets.SGL_API_KEY }} - CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }} - COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} - FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} - VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }} - VERTEX_PROJECT_ID: ${{ secrets.VERTEX_PROJECT_ID }} - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - HUGGING_FACE_API_KEY: ${{ secrets.HUGGING_FACE_API_KEY }} - BIFROST_ENCRYPTION_KEY: ${{ secrets.BIFROST_ENCRYPTION_KEY }} - run: ./.github/workflows/scripts/test-all-plugins.sh - - test-bifrost-http: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.bifrost-http-needs-release == 'true' - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 - with: - node-version: "25" - - - name: Set up Docker Compose - run: | - docker --version - if ! docker compose version >/dev/null 2>&1; then - echo "Installing Docker Compose..." - sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose - sudo chmod +x /usr/local/bin/docker-compose - docker-compose --version - else - echo "Docker Compose plugin is available" - docker compose version - fi - - - name: Run bifrost-http tests - env: - MAXIM_API_KEY: ${{ secrets.MAXIM_API_KEY }} - MAXIM_LOGGER_ID: ${{ secrets.MAXIM_LOG_REPO_ID }} - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - BIFROST_ENCRYPTION_KEY: ${{ secrets.BIFROST_ENCRYPTION_KEY }} - run: ./.github/workflows/scripts/test-bifrost-http.sh - - # Migration tests - validates database migrations from previous versions - test-migrations: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.bifrost-http-needs-release == 'true' - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 - with: - node-version: "25" - - - name: Set up Docker Compose - run: | - docker --version - if ! docker compose version >/dev/null 2>&1; then - echo "Installing Docker Compose..." - sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose - sudo chmod +x /usr/local/bin/docker-compose - docker-compose --version - else - echo "Docker Compose plugin is available" - docker compose version - fi - - - name: Run migration tests - run: | - chmod +x ./.github/workflows/scripts/run-migration-tests.sh - ./.github/workflows/scripts/run-migration-tests.sh postgres - - # E2E UI tests - validates UI with Playwright - test-e2e-ui: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.bifrost-http-needs-release == 'true' - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 - with: - node-version: "25" - - - name: Set up Docker Compose - run: | - docker --version - if ! docker compose version >/dev/null 2>&1; then - echo "Installing Docker Compose..." - sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose - sudo chmod +x /usr/local/bin/docker-compose - docker-compose --version - else - echo "Docker Compose plugin is available" - docker compose version - fi - - - name: Run E2E UI tests - env: - MCP_SSE_HEADERS: ${{ secrets.MCP_SSE_HEADERS }} - run: ./.github/workflows/scripts/test-e2e-ui.sh - - - name: Upload Playwright artifacts - if: ${{ !cancelled() }} - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 - with: - name: playwright-report - path: | - tests/e2e/test-results/ - tests/e2e/playwright-report/ - retention-days: 30 - - # Docker image test - amd64 - test-docker-image-amd64: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.docker-needs-release == 'true' - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 - with: - node-version: "25" - - - name: Install Newman - run: npm install -g newman newman-reporter-html - - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 - - - name: Test Docker image (amd64) - env: - CI: "1" - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - VERTEX_PROJECT_ID: ${{ secrets.VERTEX_PROJECT_ID }} - VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }} - GOOGLE_LOCATION: ${{ secrets.GOOGLE_LOCATION }} - MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} - COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} - GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} - PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }} - CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }} - OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - PARASAIL_API_KEY: ${{ secrets.PARASAIL_API_KEY }} - ELEVENLABS_API_KEY: ${{ secrets.ELEVENLABS_API_KEY }} - FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} - HUGGING_FACE_API_KEY: ${{ secrets.HUGGING_FACE_API_KEY }} - XAI_API_KEY: ${{ secrets.XAI_API_KEY }} - REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }} - AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} - AZURE_ENDPOINT: ${{ secrets.AZURE_ENDPOINT }} - AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.AWS_REGION }} - AWS_ARN: ${{ secrets.AWS_ARN }} - run: | - chmod +x ./.github/workflows/scripts/test-docker-image.sh - ./.github/workflows/scripts/test-docker-image.sh linux/amd64 - - - name: Upload Newman reports - if: ${{ !cancelled() }} - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 - with: - name: newman-reports-amd64 - path: tests/e2e/api/newman-reports/ - retention-days: 30 - - # Docker image test - arm64 - test-docker-image-arm64: - needs: [check-skip, detect-changes] - if: needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.docker-needs-release == 'true' - runs-on: ubuntu-24.04-arm - permissions: - contents: read - steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - fetch-tags: true - - - name: Set up Go - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 - with: - go-version: "1.26.2" - - - name: Set up Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 - with: - node-version: "25" - - - name: Install Newman - run: npm install -g newman newman-reporter-html - - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 - - - name: Test Docker image (arm64) - env: - CI: "1" - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - VERTEX_PROJECT_ID: ${{ secrets.VERTEX_PROJECT_ID }} - VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }} - GOOGLE_LOCATION: ${{ secrets.GOOGLE_LOCATION }} - MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} - COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} - GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} - PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }} - CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }} - OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - PARASAIL_API_KEY: ${{ secrets.PARASAIL_API_KEY }} - ELEVENLABS_API_KEY: ${{ secrets.ELEVENLABS_API_KEY }} - FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} - HUGGING_FACE_API_KEY: ${{ secrets.HUGGING_FACE_API_KEY }} - XAI_API_KEY: ${{ secrets.XAI_API_KEY }} - REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }} - AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} - AZURE_ENDPOINT: ${{ secrets.AZURE_ENDPOINT }} - AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.AWS_REGION }} - AWS_ARN: ${{ secrets.AWS_ARN }} - run: | - chmod +x ./.github/workflows/scripts/test-docker-image.sh - ./.github/workflows/scripts/test-docker-image.sh linux/arm64 - - - name: Upload Newman reports - if: ${{ !cancelled() }} - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 - with: - name: newman-reports-arm64 - path: tests/e2e/api/newman-reports/ - retention-days: 30 - core-release: - needs: - [ - check-skip, - detect-changes, - test-core, - approve-flaky-test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, - ] - if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.core-needs-release == 'true' && (needs.test-core.result == 'success' || (needs.test-core.result == 'failure' && needs.approve-flaky-test-core.result == 'success')) && (needs.test-framework.result == 'success' || needs.test-framework.result == 'skipped') && (needs.test-plugins.result == 'success' || needs.test-plugins.result == 'skipped') && (needs.test-bifrost-http.result == 'success' || needs.test-bifrost-http.result == 'skipped') && (needs.test-migrations.result == 'success' || needs.test-migrations.result == 'skipped') && (needs.test-docker-image-amd64.result == 'success' || needs.test-docker-image-amd64.result == 'skipped') && (needs.test-docker-image-arm64.result == 'success' || needs.test-docker-image-arm64.result == 'skipped')" + needs: [check-skip, detect-changes] + if: "needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.core-needs-release == 'true'" runs-on: ubuntu-latest permissions: contents: write @@ -643,10 +96,15 @@ jobs: success: ${{ steps.release.outputs.success }} version: ${{ needs.detect-changes.outputs.core-version }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 + github.com:443 + nodejs.org:443 + release-assets.githubusercontent.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -658,7 +116,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 @@ -712,21 +170,8 @@ jobs: run: ./.github/workflows/scripts/release-core.sh "${{ needs.detect-changes.outputs.core-version }}" framework-release: - needs: - [ - check-skip, - detect-changes, - test-core, - approve-flaky-test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, - core-release, - ] - if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.framework-needs-release == 'true' && (needs.test-core.result == 'success' || needs.test-core.result == 'skipped' || (needs.test-core.result == 'failure' && needs.approve-flaky-test-core.result == 'success')) && needs.test-framework.result == 'success' && (needs.test-plugins.result == 'success' || needs.test-plugins.result == 'skipped') && (needs.test-bifrost-http.result == 'success' || needs.test-bifrost-http.result == 'skipped') && (needs.test-migrations.result == 'success' || needs.test-migrations.result == 'skipped') && (needs.test-docker-image-amd64.result == 'success' || needs.test-docker-image-amd64.result == 'skipped') && (needs.test-docker-image-arm64.result == 'success' || needs.test-docker-image-arm64.result == 'skipped') && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped')" + needs: [check-skip, detect-changes, core-release] + if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.framework-needs-release == 'true' && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped')" runs-on: ubuntu-latest permissions: contents: write @@ -734,10 +179,17 @@ jobs: success: ${{ steps.release.outputs.success }} version: ${{ needs.detect-changes.outputs.framework-version }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 + github.com:443 + proxy.golang.org:443 + release-assets.githubusercontent.com:443 + storage.googleapis.com:443 + sum.golang.org:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -749,7 +201,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Configure Git run: | @@ -811,32 +263,33 @@ jobs: run: ./.github/workflows/scripts/release-framework.sh "${{ needs.detect-changes.outputs.framework-version }}" plugins-release: - needs: - [ - check-skip, - detect-changes, - test-core, - approve-flaky-test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, - core-release, - framework-release, - ] - if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.plugins-need-release == 'true' && (needs.test-core.result == 'success' || needs.test-core.result == 'skipped' || (needs.test-core.result == 'failure' && needs.approve-flaky-test-core.result == 'success')) && (needs.test-framework.result == 'success' || needs.test-framework.result == 'skipped') && needs.test-plugins.result == 'success' && (needs.test-bifrost-http.result == 'success' || needs.test-bifrost-http.result == 'skipped') && (needs.test-migrations.result == 'success' || needs.test-migrations.result == 'skipped') && (needs.test-docker-image-amd64.result == 'success' || needs.test-docker-image-amd64.result == 'skipped') && (needs.test-docker-image-arm64.result == 'success' || needs.test-docker-image-arm64.result == 'skipped') && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped')" + needs: [check-skip, detect-changes, core-release, framework-release] + if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.plugins-need-release == 'true' && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped')" runs-on: ubuntu-latest permissions: contents: write outputs: success: ${{ steps.release.outputs.success }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + _http._tcp.azure.archive.ubuntu.com:443 + _https._tcp.esm.ubuntu.com:443 + _https._tcp.motd.ubuntu.com:443 + _https._tcp.packages.microsoft.com:443 + api.github.com:443 + azure.archive.ubuntu.com:80 + esm.ubuntu.com:443 + github.com:443 + nodejs.org:443 + packages.microsoft.com:443 + proxy.golang.org:443 + release-assets.githubusercontent.com:443 + storage.googleapis.com:443 + sum.golang.org:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -853,7 +306,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 @@ -921,33 +374,29 @@ jobs: # Prep: update dependencies, validate build, commit/push bifrost-http-prep: - needs: - [ - check-skip, - detect-changes, - test-core, - approve-flaky-test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, - core-release, - framework-release, - plugins-release, - ] - if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.bifrost-http-needs-release == 'true' && (needs.test-core.result == 'success' || needs.test-core.result == 'skipped' || (needs.test-core.result == 'failure' && needs.approve-flaky-test-core.result == 'success')) && (needs.test-framework.result == 'success' || needs.test-framework.result == 'skipped') && (needs.test-plugins.result == 'success' || needs.test-plugins.result == 'skipped') && needs.test-bifrost-http.result == 'success' && needs.test-migrations.result == 'success' && (needs.test-docker-image-amd64.result == 'success' || needs.test-docker-image-amd64.result == 'skipped') && (needs.test-docker-image-arm64.result == 'success' || needs.test-docker-image-arm64.result == 'skipped') && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped')" + needs: [check-skip, detect-changes, core-release, framework-release, plugins-release] + if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.bifrost-http-needs-release == 'true' && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped')" runs-on: ubuntu-latest permissions: contents: write outputs: success: ${{ steps.prep.outputs.success }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 + fonts.googleapis.com:443 + fonts.gstatic.com:443 + github.com:443 + nodejs.org:443 + proxy.golang.org:443 + registry.npmjs.org:443 + release-assets.githubusercontent.com:443 + storage.googleapis.com:443 + sum.golang.org:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -959,7 +408,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 @@ -985,10 +434,30 @@ jobs: permissions: contents: read steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + 7defe2860d5ee49a1e667e1eeea34b25.r2.cloudflarestorage.com:443 + _http._tcp.azure.archive.ubuntu.com:443 + _https._tcp.esm.ubuntu.com:443 + _https._tcp.motd.ubuntu.com:443 + _https._tcp.packages.microsoft.com:443 + api.github.com:443 + azure.archive.ubuntu.com:80 + esm.ubuntu.com:443 + files.pythonhosted.org:443 + fonts.googleapis.com:443 + fonts.gstatic.com:443 + github.com:443 + nodejs.org:443 + packages.microsoft.com:443 + proxy.golang.org:443 + pypi.org:443 + registry.npmjs.org:443 + release-assets.githubusercontent.com:443 + storage.googleapis.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1003,7 +472,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 @@ -1041,10 +510,23 @@ jobs: permissions: contents: read steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + 7defe2860d5ee49a1e667e1eeea34b25.r2.cloudflarestorage.com:443 + api.github.com:443 + files.pythonhosted.org:443 + fonts.googleapis.com:443 + fonts.gstatic.com:443 + github.com:443 + nodejs.org:443 + proxy.golang.org:443 + pypi.org:443 + registry.npmjs.org:443 + release-assets.githubusercontent.com:443 + storage.googleapis.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1059,7 +541,7 @@ jobs: - name: Set up Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 @@ -1097,10 +579,16 @@ jobs: success: ${{ steps.release.outputs.success }} version: ${{ needs.detect-changes.outputs.transport-version }} steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + 7defe2860d5ee49a1e667e1eeea34b25.r2.cloudflarestorage.com:443 + api.github.com:443 + files.pythonhosted.org:443 + github.com:443 + pypi.org:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1129,24 +617,8 @@ jobs: # Docker build amd64 docker-build-amd64: - needs: - [ - check-skip, - detect-changes, - test-core, - approve-flaky-test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, - core-release, - framework-release, - plugins-release, - bifrost-http-release, - ] - if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.docker-needs-release == 'true' && (needs.test-core.result == 'success' || needs.test-core.result == 'skipped' || (needs.test-core.result == 'failure' && needs.approve-flaky-test-core.result == 'success')) && (needs.test-framework.result == 'success' || needs.test-framework.result == 'skipped') && (needs.test-plugins.result == 'success' || needs.test-plugins.result == 'skipped') && (needs.test-bifrost-http.result == 'success' || needs.test-bifrost-http.result == 'skipped') && (needs.test-migrations.result == 'success' || needs.test-migrations.result == 'skipped') && (needs.test-docker-image-amd64.result == 'success' || needs.test-docker-image-amd64.result == 'skipped') && (needs.test-docker-image-arm64.result == 'success' || needs.test-docker-image-arm64.result == 'skipped') && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped') && (needs.detect-changes.outputs.bifrost-http-needs-release == 'false' || needs.bifrost-http-release.result == 'success' || needs.bifrost-http-release.result == 'skipped')" + needs: [check-skip, detect-changes, core-release, framework-release, plugins-release, bifrost-http-release] + if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.docker-needs-release == 'true' && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped') && (needs.detect-changes.outputs.bifrost-http-needs-release == 'false' || needs.bifrost-http-release.result == 'success' || needs.bifrost-http-release.result == 'skipped')" runs-on: ubuntu-latest permissions: contents: write @@ -1155,10 +627,21 @@ jobs: ACCOUNT: maximhq IMAGE_NAME: bifrost steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + auth.docker.io:443 + dl-cdn.alpinelinux.org:443 + fonts.googleapis.com:443 + fonts.gstatic.com:443 + github.com:443 + production.cloudflare.docker.com:443 + proxy.golang.org:443 + registry-1.docker.io:443 + registry.npmjs.org:443 + storage.googleapis.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1205,24 +688,8 @@ jobs: # Docker build arm64 docker-build-arm64: - needs: - [ - check-skip, - detect-changes, - test-core, - approve-flaky-test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, - core-release, - framework-release, - plugins-release, - bifrost-http-release, - ] - if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.docker-needs-release == 'true' && (needs.test-core.result == 'success' || needs.test-core.result == 'skipped' || (needs.test-core.result == 'failure' && needs.approve-flaky-test-core.result == 'success')) && (needs.test-framework.result == 'success' || needs.test-framework.result == 'skipped') && (needs.test-plugins.result == 'success' || needs.test-plugins.result == 'skipped') && (needs.test-bifrost-http.result == 'success' || needs.test-bifrost-http.result == 'skipped') && (needs.test-migrations.result == 'success' || needs.test-migrations.result == 'skipped') && (needs.test-docker-image-amd64.result == 'success' || needs.test-docker-image-amd64.result == 'skipped') && (needs.test-docker-image-arm64.result == 'success' || needs.test-docker-image-arm64.result == 'skipped') && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped') && (needs.detect-changes.outputs.bifrost-http-needs-release == 'false' || needs.bifrost-http-release.result == 'success' || needs.bifrost-http-release.result == 'skipped')" + needs: [check-skip, detect-changes, core-release, framework-release, plugins-release, bifrost-http-release] + if: "always() && needs.check-skip.outputs.should-skip != 'true' && needs.detect-changes.outputs.docker-needs-release == 'true' && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped') && (needs.detect-changes.outputs.bifrost-http-needs-release == 'false' || needs.bifrost-http-release.result == 'success' || needs.bifrost-http-release.result == 'skipped')" runs-on: ubuntu-24.04-arm permissions: contents: write @@ -1231,10 +698,21 @@ jobs: ACCOUNT: maximhq IMAGE_NAME: bifrost steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + auth.docker.io:443 + dl-cdn.alpinelinux.org:443 + fonts.googleapis.com:443 + fonts.gstatic.com:443 + github.com:443 + production.cloudflare.docker.com:443 + proxy.golang.org:443 + registry-1.docker.io:443 + registry.npmjs.org:443 + storage.googleapis.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1289,10 +767,15 @@ jobs: ACCOUNT: maximhq IMAGE_NAME: bifrost steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + auth.docker.io:443 + github.com:443 + production.cloudflare.docker.com:443 + registry-1.docker.io:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1309,32 +792,18 @@ jobs: # Push Mintlify changelog push-mintlify-changelog: - needs: - [ - check-skip, - detect-changes, - test-core, - approve-flaky-test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, - core-release, - framework-release, - plugins-release, - bifrost-http-release, - ] - if: "always() && needs.check-skip.outputs.should-skip != 'true' && (needs.test-core.result == 'success' || needs.test-core.result == 'skipped' || (needs.test-core.result == 'failure' && needs.approve-flaky-test-core.result == 'success')) && (needs.test-framework.result == 'success' || needs.test-framework.result == 'skipped') && (needs.test-plugins.result == 'success' || needs.test-plugins.result == 'skipped') && (needs.test-bifrost-http.result == 'success' || needs.test-bifrost-http.result == 'skipped') && (needs.test-migrations.result == 'success' || needs.test-migrations.result == 'skipped') && (needs.test-docker-image-amd64.result == 'success' || needs.test-docker-image-amd64.result == 'skipped') && (needs.test-docker-image-arm64.result == 'success' || needs.test-docker-image-arm64.result == 'skipped') && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped') && (needs.detect-changes.outputs.bifrost-http-needs-release == 'false' || needs.bifrost-http-release.result == 'success' || needs.bifrost-http-release.result == 'skipped')" + needs: [check-skip, detect-changes, core-release, framework-release, plugins-release, bifrost-http-release] + if: "always() && needs.check-skip.outputs.should-skip != 'true' && (needs.detect-changes.outputs.core-needs-release == 'false' || needs.core-release.result == 'success' || needs.core-release.result == 'skipped') && (needs.detect-changes.outputs.framework-needs-release == 'false' || needs.framework-release.result == 'success' || needs.framework-release.result == 'skipped') && (needs.detect-changes.outputs.plugins-need-release == 'false' || needs.plugins-release.result == 'success' || needs.plugins-release.result == 'skipped') && (needs.detect-changes.outputs.bifrost-http-needs-release == 'false' || needs.bifrost-http-release.result == 'success' || needs.bifrost-http-release.result == 'skipped')" runs-on: ubuntu-latest permissions: contents: write steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + github.com:443 - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1353,13 +822,6 @@ jobs: [ check-skip, detect-changes, - test-core, - test-framework, - test-plugins, - test-bifrost-http, - test-migrations, - test-docker-image-amd64, - test-docker-image-arm64, core-release, framework-release, plugins-release, @@ -1369,10 +831,20 @@ jobs: if: "always() && needs.check-skip.outputs.should-skip != 'true'" runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + _http._tcp.azure.archive.ubuntu.com:443 + _https._tcp.esm.ubuntu.com:443 + _https._tcp.motd.ubuntu.com:443 + _https._tcp.packages.microsoft.com:443 + azure.archive.ubuntu.com:80 + discord.com:443 + dl.google.com:443 + esm.ubuntu.com:443 + packages.microsoft.com:443 - name: Install jq run: | diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 33206cdb3e..684d901c22 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -35,10 +35,23 @@ jobs: checks: read steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.deps.dev:443 + api.github.com:443 + api.osv.dev:443 + api.scorecard.dev:443 + auth.docker.io:443 + fulcio.sigstore.dev:443 + github.com:443 + index.docker.io:443 + oss-fuzz-build-logs.storage.googleapis.com:443 + rekor.sigstore.dev:443 + tuf-repo-cdn.sigstore.dev:443 + www.bestpractices.dev:443 - name: "Checkout code" uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/.github/workflows/scripts/release-bifrost-http-prep.sh b/.github/workflows/scripts/release-bifrost-http-prep.sh index c5eff8fc52..5983453755 100755 --- a/.github/workflows/scripts/release-bifrost-http-prep.sh +++ b/.github/workflows/scripts/release-bifrost-http-prep.sh @@ -76,24 +76,32 @@ echo "🔧 Using plugin versions from version files for transport..." # Track which plugins are actually used by the transport cd transports + +# Normalize the local go.mod directive up front so prior-release artifacts +# (e.g. `go 1.26.2` written by earlier `go get` runs) don't trip GOTOOLCHAIN=local. +go mod edit -go=1.26.1 -toolchain=none + for plugin_name in "${!PLUGIN_VERSIONS[@]}"; do plugin_version="${PLUGIN_VERSIONS[$plugin_name]}" # Check if transport depends on this plugin if grep -q "github.com/maximhq/bifrost/plugins/$plugin_name" go.mod; then echo " 📦 Using $plugin_name plugin $plugin_version" - go_get_with_backoff "github.com/maximhq/bifrost/plugins/$plugin_name@$plugin_version" + # Textual require bump — skips loading the currently-declared version's go.mod + go mod edit -require="github.com/maximhq/bifrost/plugins/$plugin_name@$plugin_version" fi done # Also ensure core and framework are up to date echo " 🔧 Updating core to $CORE_VERSION" -go_get_with_backoff "github.com/maximhq/bifrost/core@$CORE_VERSION" +go mod edit -require="github.com/maximhq/bifrost/core@$CORE_VERSION" echo " 📦 Updating framework to $FRAMEWORK_VERSION" -go_get_with_backoff "github.com/maximhq/bifrost/framework@$FRAMEWORK_VERSION" +go mod edit -require="github.com/maximhq/bifrost/framework@$FRAMEWORK_VERSION" +# Re-normalize before tidy in case any edit reintroduced a toolchain line +go mod edit -go=1.26.1 -toolchain=none go mod tidy cd .. diff --git a/.github/workflows/scripts/run-migration-tests.sh b/.github/workflows/scripts/run-migration-tests.sh index f59cd7ef25..eed0eba523 100755 --- a/.github/workflows/scripts/run-migration-tests.sh +++ b/.github/workflows/scripts/run-migration-tests.sh @@ -1305,6 +1305,22 @@ append_dynamic_columns_postgres() { echo "UPDATE mcp_tool_logs SET request_id = '' WHERE id = 'mcp-log-migration-001';" >> "$output_file" echo "UPDATE mcp_tool_logs SET request_id = '' WHERE id = 'mcp-log-migration-002';" >> "$output_file" fi + + # ------------------------------------------------------------------------- + # v1.4.22 columns - governance_model_pricing flex tier pricing + # ------------------------------------------------------------------------- + if column_exists_postgres "governance_model_pricing" "input_cost_per_token_flex"; then + echo "UPDATE governance_model_pricing SET input_cost_per_token_flex = NULL WHERE id = 1;" >> "$output_file" + echo "UPDATE governance_model_pricing SET input_cost_per_token_flex = NULL WHERE id = 2;" >> "$output_file" + fi + if column_exists_postgres "governance_model_pricing" "output_cost_per_token_flex"; then + echo "UPDATE governance_model_pricing SET output_cost_per_token_flex = NULL WHERE id = 1;" >> "$output_file" + echo "UPDATE governance_model_pricing SET output_cost_per_token_flex = NULL WHERE id = 2;" >> "$output_file" + fi + if column_exists_postgres "governance_model_pricing" "cache_read_input_token_cost_flex"; then + echo "UPDATE governance_model_pricing SET cache_read_input_token_cost_flex = NULL WHERE id = 1;" >> "$output_file" + echo "UPDATE governance_model_pricing SET cache_read_input_token_cost_flex = NULL WHERE id = 2;" >> "$output_file" + fi } # Append dynamic column UPDATEs for columns that may not exist in older schemas (SQLite) @@ -1848,6 +1864,22 @@ append_dynamic_columns_sqlite() { echo "UPDATE governance_model_pricing SET cache_read_input_token_cost_above_272k_tokens_priority = NULL WHERE id = 1;" >> "$output_file" echo "UPDATE governance_model_pricing SET cache_read_input_token_cost_above_272k_tokens_priority = NULL WHERE id = 2;" >> "$output_file" fi + + # ------------------------------------------------------------------------- + # v1.4.22 columns - governance_model_pricing flex tier pricing + # ------------------------------------------------------------------------- + if column_exists_sqlite "$config_db" "governance_model_pricing" "input_cost_per_token_flex"; then + echo "UPDATE governance_model_pricing SET input_cost_per_token_flex = NULL WHERE id = 1;" >> "$output_file" + echo "UPDATE governance_model_pricing SET input_cost_per_token_flex = NULL WHERE id = 2;" >> "$output_file" + fi + if column_exists_sqlite "$config_db" "governance_model_pricing" "output_cost_per_token_flex"; then + echo "UPDATE governance_model_pricing SET output_cost_per_token_flex = NULL WHERE id = 1;" >> "$output_file" + echo "UPDATE governance_model_pricing SET output_cost_per_token_flex = NULL WHERE id = 2;" >> "$output_file" + fi + if column_exists_sqlite "$config_db" "governance_model_pricing" "cache_read_input_token_cost_flex"; then + echo "UPDATE governance_model_pricing SET cache_read_input_token_cost_flex = NULL WHERE id = 1;" >> "$output_file" + echo "UPDATE governance_model_pricing SET cache_read_input_token_cost_flex = NULL WHERE id = 2;" >> "$output_file" + fi fi # ------------------------------------------------------------------------- diff --git a/.github/workflows/scripts/setup-go-workspace.sh b/.github/workflows/scripts/setup-go-workspace.sh index a5effd3c49..bc9a4d2854 100755 --- a/.github/workflows/scripts/setup-go-workspace.sh +++ b/.github/workflows/scripts/setup-go-workspace.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail - +export GOTOOLCHAIN=auto # If go.work exists, skip if [ -f "go.work" ]; then diff --git a/.github/workflows/scripts/validate-helm-config-fields.sh b/.github/workflows/scripts/validate-helm-config-fields.sh index 3b08dfffe9..ed7a17857d 100755 --- a/.github/workflows/scripts/validate-helm-config-fields.sh +++ b/.github/workflows/scripts/validate-helm-config-fields.sh @@ -164,7 +164,10 @@ bifrost: enforceGovernanceHeader: true allowDirectKeys: true maxRequestBodySizeMb: 50 - enableLitellmFallbacks: true + compat: + convertTextToChat: true + convertChatToResponses: true + shouldDropParams: true prometheusLabels: - "team" - "env" @@ -200,7 +203,9 @@ assert_field_value 'client.log_retention_days' '.client.log_retention_days' '30' assert_field_value 'client.enforce_governance_header' '.client.enforce_governance_header' 'true' assert_field_value 'client.allow_direct_keys' '.client.allow_direct_keys' 'true' assert_field_value 'client.max_request_body_size_mb' '.client.max_request_body_size_mb' '50' -assert_field_value 'client.enable_litellm_fallbacks' '.client.enable_litellm_fallbacks' 'true' +assert_field_value 'client.compat.convert_text_to_chat' '.client.compat.convert_text_to_chat' 'true' +assert_field_value 'client.compat.convert_chat_to_responses' '.client.compat.convert_chat_to_responses' 'true' +assert_field_value 'client.compat.should_drop_params' '.client.compat.should_drop_params' 'true' assert_field 'client.prometheus_labels' '.client.prometheus_labels' assert_field 'client.header_filter_config.allowlist' '.client.header_filter_config.allowlist' assert_field 'client.header_filter_config.denylist' '.client.header_filter_config.denylist' @@ -638,7 +643,7 @@ bifrost: config: service_name: "bifrost-test" collector_url: "otel-collector:4317" - trace_type: "otel" + trace_type: "genai_extension" protocol: "grpc" metrics_enabled: true metrics_endpoint: "otel-collector:4317" @@ -711,7 +716,7 @@ assert_field_value 'plugins: semantic_cache vector_store_namespace' '.plugins.[4 assert_field_value 'plugins: otel name' '.plugins.[5].name' '"otel"' assert_field_value 'plugins: otel service_name' '.plugins.[5].config.service_name' '"bifrost-test"' assert_field_value 'plugins: otel collector_url' '.plugins.[5].config.collector_url' '"otel-collector:4317"' -assert_field_value 'plugins: otel trace_type' '.plugins.[5].config.trace_type' '"otel"' +assert_field_value 'plugins: otel trace_type' '.plugins.[5].config.trace_type' '"genai_extension"' assert_field_value 'plugins: otel protocol' '.plugins.[5].config.protocol' '"grpc"' assert_field_value 'plugins: otel metrics_enabled' '.plugins.[5].config.metrics_enabled' 'true' assert_field_value 'plugins: otel metrics_endpoint' '.plugins.[5].config.metrics_endpoint' '"otel-collector:4317"' @@ -865,35 +870,36 @@ assert_field_value 'cluster_config.discovery.k8s_label_selector' '.cluster_confi # Gap 7: Cluster region assert_field_value 'cluster_config.region' '.cluster_config.region' '"us-east-1"' -# SAML - Okta -cat > "$TMPDIR/values-saml-okta.yaml" << 'VALS' +# SCIM - Okta +cat > "$TMPDIR/values-scim-okta.yaml" << 'VALS' image: tag: v1.0.0 bifrost: - saml: + scim: enabled: true provider: "okta" config: issuerUrl: "https://dev-123.okta.com/oauth2/default" clientId: "okta-client-id" clientSecret: "okta-client-secret" + apiToken: "okta-api-token" audience: "api://default" userIdField: "sub" teamIdsField: "groups" rolesField: "roles" VALS -render_config "$TMPDIR/values-saml-okta.yaml" -assert_field_value 'saml_config.enabled' '.saml_config.enabled' 'true' -assert_field_value 'saml_config.provider' '.saml_config.provider' '"okta"' -assert_field 'saml_config.config' '.saml_config.config' +render_config "$TMPDIR/values-scim-okta.yaml" +assert_field_value 'scim_config.enabled' '.scim_config.enabled' 'true' +assert_field_value 'scim_config.provider' '.scim_config.provider' '"okta"' +assert_field 'scim_config.config' '.scim_config.config' -# SAML - Entra -cat > "$TMPDIR/values-saml-entra.yaml" << 'VALS' +# SCIM - Entra +cat > "$TMPDIR/values-scim-entra.yaml" << 'VALS' image: tag: v1.0.0 bifrost: - saml: + scim: enabled: true provider: "entra" config: @@ -907,9 +913,9 @@ bifrost: rolesField: "roles" VALS -render_config "$TMPDIR/values-saml-entra.yaml" -assert_field_value 'saml_config (entra) provider' '.saml_config.provider' '"entra"' -assert_field 'saml_config (entra) config' '.saml_config.config' +render_config "$TMPDIR/values-scim-entra.yaml" +assert_field_value 'scim_config (entra) provider' '.scim_config.provider' '"entra"' +assert_field 'scim_config (entra) config' '.scim_config.config' # Load Balancer cat > "$TMPDIR/values-lb.yaml" << 'VALS' diff --git a/.github/workflows/scripts/validate-helm-schema.sh b/.github/workflows/scripts/validate-helm-schema.sh index 80febb2e4c..98c36f3137 100755 --- a/.github/workflows/scripts/validate-helm-schema.sh +++ b/.github/workflows/scripts/validate-helm-schema.sh @@ -196,8 +196,8 @@ else echo "✅ VLLM key config required fields match: [$HELM_VLLM_REQUIRED]" fi -# Check concurrency_config required fields -CONFIG_CONCURRENCY_REQUIRED=$(jq -r '."$defs".concurrency_config.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "") +# Check concurrency_config required fields (config calls this def concurrency_and_buffer_size) +CONFIG_CONCURRENCY_REQUIRED=$(jq -r '."$defs".concurrency_and_buffer_size.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "") HELM_CONCURRENCY_REQUIRED=$(jq -r '."$defs".concurrencyConfig.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "") if [ "$CONFIG_CONCURRENCY_REQUIRED" != "$HELM_CONCURRENCY_REQUIRED" ]; then @@ -433,38 +433,15 @@ else echo "✅ MCP stdio config required fields match: [$CONFIG_MCP_STDIO_REQUIRED]" fi -# Check MCP websocket_config required fields -CONFIG_MCP_WS_REQUIRED=$(jq -r '."$defs".mcp_client_config.properties.websocket_config.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "") -HELM_MCP_WS_REQUIRED=$(jq -r '."$defs".mcpClientConfig.properties.websocketConfig.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "") - -if [ "$CONFIG_MCP_WS_REQUIRED" != "$HELM_MCP_WS_REQUIRED" ]; then - echo "❌ MCP websocket config required fields mismatch:" - echo " Config: [$CONFIG_MCP_WS_REQUIRED]" - echo " Helm: [$HELM_MCP_WS_REQUIRED]" - ERRORS=$((ERRORS + 1)) -else - echo "✅ MCP websocket config required fields match: [$CONFIG_MCP_WS_REQUIRED]" -fi - -# Check MCP http_config required fields -CONFIG_MCP_HTTP_REQUIRED=$(jq -r '."$defs".mcp_client_config.properties.http_config.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "") -HELM_MCP_HTTP_REQUIRED=$(jq -r '."$defs".mcpClientConfig.properties.httpConfig.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "") - -if [ "$CONFIG_MCP_HTTP_REQUIRED" != "$HELM_MCP_HTTP_REQUIRED" ]; then - echo "❌ MCP http config required fields mismatch:" - echo " Config: [$CONFIG_MCP_HTTP_REQUIRED]" - echo " Helm: [$HELM_MCP_HTTP_REQUIRED]" - ERRORS=$((ERRORS + 1)) -else - echo "✅ MCP http config required fields match: [$CONFIG_MCP_HTTP_REQUIRED]" -fi +# MCP websocket_config / http_config are Helm-only sub-structures; config.schema.json uses +# a flat connection_type + connection_string instead, so there is nothing to compare here. echo "" echo "🔍 Checking required fields in SAML/SCIM config..." # Check okta_config required fields CONFIG_OKTA_REQUIRED=$(jq -r '."$defs".okta_config.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "") -HELM_OKTA_REQUIRED=$(jq -r '.properties.bifrost.properties.saml.allOf[0].then.properties.config.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "") +HELM_OKTA_REQUIRED=$(jq -r '.properties.bifrost.properties.scim.allOf[0].then.properties.config.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "") if [ "$CONFIG_OKTA_REQUIRED" != "$HELM_OKTA_REQUIRED" ]; then echo "❌ Okta config required fields mismatch:" @@ -477,7 +454,7 @@ fi # Check entra_config required fields CONFIG_ENTRA_REQUIRED=$(jq -r '."$defs".entra_config.required // [] | sort | join(",")' "$CONFIG_SCHEMA" 2>/dev/null || echo "") -HELM_ENTRA_REQUIRED=$(jq -r '.properties.bifrost.properties.saml.allOf[1].then.properties.config.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "") +HELM_ENTRA_REQUIRED=$(jq -r '.properties.bifrost.properties.scim.allOf[1].then.properties.config.required // [] | sort | join(",")' "$HELM_SCHEMA" 2>/dev/null || echo "") if [ "$CONFIG_ENTRA_REQUIRED" != "$HELM_ENTRA_REQUIRED" ]; then echo "❌ Entra config required fields mismatch:" diff --git a/.github/workflows/scripts/validate-helm-templates.sh b/.github/workflows/scripts/validate-helm-templates.sh index 380ae8d109..ccab87838e 100755 --- a/.github/workflows/scripts/validate-helm-templates.sh +++ b/.github/workflows/scripts/validate-helm-templates.sh @@ -270,7 +270,7 @@ test_template "client: new properties (Gap 1+2)" \ test_template "otel: headers + tls_ca_cert + insecure (Gap 3)" \ --set bifrost.plugins.otel.enabled=true \ --set bifrost.plugins.otel.config.collector_url=otel:4317 \ - --set bifrost.plugins.otel.config.trace_type=otel \ + --set bifrost.plugins.otel.config.trace_type=genai_extension \ --set bifrost.plugins.otel.config.protocol=grpc \ --set 'bifrost.plugins.otel.config.headers.Authorization=Bearer token' \ --set bifrost.plugins.otel.config.tls_ca_cert=/certs/ca.pem \ @@ -317,7 +317,7 @@ test_template "combined: all new Gap 1-8 fields" \ --set bifrost.client.hideDeletedVirtualKeysInFilters=true \ --set bifrost.plugins.otel.enabled=true \ --set bifrost.plugins.otel.config.collector_url=otel:4317 \ - --set bifrost.plugins.otel.config.trace_type=otel \ + --set bifrost.plugins.otel.config.trace_type=genai_extension \ --set bifrost.plugins.otel.config.protocol=grpc \ --set bifrost.plugins.otel.config.insecure=true \ --set bifrost.plugins.governance.enabled=true \ diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml index 5478ca835e..cfb3fe7b82 100644 --- a/.github/workflows/snyk.yml +++ b/.github/workflows/snyk.yml @@ -16,10 +16,29 @@ jobs: name: Snyk Open Source (deps) runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 + api.snyk.io:443 + downloads.snyk.io:443 + files.pythonhosted.org:443 + fonts.googleapis.com:443 + fonts.gstatic.com:443 + github.com:443 + iojs.org:443 + nodejs.org:443 + packages.microsoft.com:443 + proxy.golang.org:443 + raw.githubusercontent.com:443 + registry.npmjs.org:443 + release-assets.githubusercontent.com:443 + releases.astral.sh:443 + static.snyk.io:443 + storage.googleapis.com:443 + sum.golang.org:443 - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -42,7 +61,7 @@ jobs: - name: Setup Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Setup Go workspace run: make setup-workspace @@ -70,10 +89,29 @@ jobs: name: Snyk Code (SAST) runs-on: ubuntu-latest steps: - - name: Harden the runner (Audit all outbound calls) + - name: Harden Runner uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 with: - egress-policy: audit + egress-policy: block + allowed-endpoints: > + api.github.com:443 + api.snyk.io:443 + deeproxy.snyk.io:443 + downloads.snyk.io:443 + files.pythonhosted.org:443 + fonts.googleapis.com:443 + fonts.gstatic.com:443 + github.com:443 + iojs.org:443 + nodejs.org:443 + packages.microsoft.com:443 + proxy.golang.org:443 + raw.githubusercontent.com:443 + registry.npmjs.org:443 + release-assets.githubusercontent.com:443 + releases.astral.sh:443 + storage.googleapis.com:443 + sum.golang.org:443 - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -96,7 +134,7 @@ jobs: - name: Setup Go uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: - go-version: "1.26.2" + go-version: "1.26.1" - name: Setup Go workspace run: make setup-workspace diff --git a/README.md b/README.md index 9e843c58fc..f957c58ee1 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![Go Report Card](https://goreportcard.com/badge/github.com/maximhq/bifrost/core)](https://goreportcard.com/report/github.com/maximhq/bifrost/core) [![Discord badge](https://dcbadge.limes.pink/api/server/https://discord.gg/exN5KAydbU?style=flat)](https://discord.gg/exN5KAydbU) -[![Known Vulnerabilities](https://snyk.io/test/github/maximhq/bifrost/badge.svg)](https://snyk.io/test/github/maximhq/bifrost) [![codecov](https://codecov.io/gh/maximhq/bifrost/branch/main/graph/badge.svg)](https://codecov.io/gh/maximhq/bifrost) ![Docker Pulls](https://img.shields.io/docker/pulls/maximhq/bifrost) [Run In Postman](https://app.getpostman.com/run-collection/31642484-2ba0e658-4dcd-49f4-845a-0c7ed745b916?action=collection%2Ffork&source=rip_markdown&collection-url=entityId%3D31642484-2ba0e658-4dcd-49f4-845a-0c7ed745b916%26entityType%3Dcollection%26workspaceId%3D63e853c8-9aec-477f-909c-7f02f543150e) diff --git a/cli/go.mod b/cli/go.mod index c887aac505..f7260a3b86 100644 --- a/cli/go.mod +++ b/cli/go.mod @@ -1,6 +1,6 @@ module github.com/maximhq/bifrost/cli -go 1.26.2 +go 1.26.1 require ( github.com/bytedance/sonic v1.15.0 diff --git a/core/bifrost.go b/core/bifrost.go index 7db6663758..70454a96e6 100644 --- a/core/bifrost.go +++ b/core/bifrost.go @@ -89,12 +89,41 @@ type Bifrost struct { // ProviderQueue wraps a provider's request channel with lifecycle management // to prevent "send on closed channel" panics during provider removal/update. // Producers must check the closing flag or select on the done channel before sending. +// +// Why pq.queue is NEVER closed: +// +// Closing a channel in Go causes any concurrent send to that channel to panic +// ("send on closed channel"). There is always a TOCTOU window between a +// producer's isClosing() check and its select { case pq.queue <- msg: ... }: +// the producer could pass isClosing() while the queue is open, get preempted, +// and resume only after the queue is closed. Go's selectgo evaluates select +// cases in a random order, so even having case <-pq.done: in the same select +// does not protect against this — if selectgo evaluates the send case first on +// a closed channel it panics immediately via goto sclose, before reaching done. +// +// To close pq.queue safely you would need a sender-side WaitGroup so that +// signalClosing could wait for every in-flight producer to finish. That adds +// non-trivial overhead on the hot request path. +// +// Instead, pq.done is the sole shutdown signal. Receiving from a closed channel +// is always safe (returns the zero value immediately), so: +// - Workers exit via case <-pq.done: — safe +// - Producers bail via case <-pq.done: — safe +// - drainQueueWithErrors handles any messages that slip through the TOCTOU window +// +// pq.queue is garbage collected automatically: +// - RemoveProvider calls requestQueues.Delete, dropping the map's reference. +// - UpdateProvider calls requestQueues.Store with a new queue, dropping the +// map's reference to oldPq. Shutdown does not Delete at all — the whole +// Bifrost instance is torn down. +// In all cases, once no producer goroutine holds a reference to the +// ProviderQueue, both the struct and pq.queue are eligible for GC. +// No explicit close is needed. type ProviderQueue struct { - queue chan *ChannelMessage // the actual request queue channel - done chan struct{} // closed to signal shutdown to producers + queue chan *ChannelMessage // the actual request queue channel — never closed, see above + done chan struct{} // closed by signalClosing() to signal shutdown; never written to otherwise closing uint32 // atomic: 0 = open, 1 = closing signalOnce sync.Once - closeOnce sync.Once } func isLargePayloadPassthrough(ctx *schemas.BifrostContext) bool { @@ -122,14 +151,6 @@ func (pq *ProviderQueue) signalClosing() { }) } -// closeQueue closes the provider queue. -// Protected by sync.Once to prevent double-close. -func (pq *ProviderQueue) closeQueue() { - pq.closeOnce.Do(func() { - close(pq.queue) - }) -} - // isClosing returns true if the provider queue is closing. // Uses atomic load for lock-free checking. func (pq *ProviderQueue) isClosing() bool { @@ -3109,57 +3130,36 @@ func (bifrost *Bifrost) RemoveProvider(providerKey schemas.ModelProvider) error } pq := pqValue.(*ProviderQueue) - // Step 2: Signal closing to producers (prevents new sends) - // This must happen before closing the queue to avoid "send on closed channel" panics + // Step 2: Signal closing. Blocks new producers (isClosing() returns true) and + // causes idle workers to drain remaining buffered requests with errors then exit. pq.signalClosing() bifrost.logger.Debug("signaled closing for provider %s", providerKey) - // Step 3: Now safe to close the queue (no new producers can send) - pq.closeQueue() - bifrost.logger.Debug("closed request queue for provider %s", providerKey) - - // Step 4: Wait for all workers to finish processing in-flight requests + // Step 3: Wait for all workers to finish in-flight requests and exit. waitGroup, exists := bifrost.waitGroups.Load(providerKey) if exists { waitGroup.(*sync.WaitGroup).Wait() bifrost.logger.Debug("all workers for provider %s have stopped", providerKey) } - // Step 5: Remove the provider from the request queues + // Step 3b: Final drain sweep — see drainQueueWithErrors for full explanation. + bifrost.drainQueueWithErrors(pq) + + // Step 4: Remove the provider from the request queues. bifrost.requestQueues.Delete(providerKey) - // Step 6: Remove the provider from the wait groups + // Step 5: Remove the provider from the wait groups. bifrost.waitGroups.Delete(providerKey) - // Step 7: Remove the provider from the providers slice - replacementAttempts := 0 - maxReplacementAttempts := 100 // Prevent infinite loops in high-contention scenarios - for { - replacementAttempts++ - if replacementAttempts > maxReplacementAttempts { - return fmt.Errorf("failed to replace provider %s in providers slice after %d attempts", providerKey, maxReplacementAttempts) - } - oldPtr := bifrost.providers.Load() - var oldSlice []schemas.Provider - if oldPtr != nil { - oldSlice = *oldPtr - } - // Create new slice without the old provider of this key - // Use exact capacity to avoid allocations - if len(oldSlice) == 0 { - return fmt.Errorf("provider %s not found in providers slice", providerKey) - } - newSlice := make([]schemas.Provider, 0, len(oldSlice)-1) - for _, existingProvider := range oldSlice { - if existingProvider.GetProviderKey() != providerKey { - newSlice = append(newSlice, existingProvider) - } - } - if bifrost.providers.CompareAndSwap(oldPtr, &newSlice) { - bifrost.logger.Debug("successfully removed provider instance for %s in providers slice", providerKey) - break - } - // Retrying as swapping did not work (likely due to concurrent modification) + // Step 6: Remove the provider from the providers slice. + if err := bifrost.removeProviderFromSlice(providerKey); err != nil { + bifrost.logger.Error( + "provider %s was removed from queues but could not be removed from the providers slice — "+ + "bifrost.providers is now inconsistent. "+ + "To recover: retry RemoveProvider(%s), or restart Bifrost if that fails.", + providerKey, providerKey, + ) + return err } bifrost.logger.Info("successfully removed provider %s", providerKey) @@ -3181,6 +3181,15 @@ func (bifrost *Bifrost) RemoveProvider(providerKey schemas.ModelProvider) error // Note: This operation will temporarily pause request processing for the specified provider // while the transition occurs. In-flight requests will complete before workers are stopped. // Buffered requests in the old queue will be transferred to the new queue to prevent loss. +// +// Concurrency safety — no-worker window: +// UpdateProvider holds a per-provider write lock (providerMutex.Lock) for its entire +// duration. All producer paths (tryRequest, tryStreamRequest) acquire the corresponding +// read lock inside getProviderQueue before they can look up or enqueue into any queue. +// This means no producer can observe or enqueue into newPq until UpdateProvider returns +// and releases the write lock — at which point new workers are already running and +// consuming newPq. There is therefore no window where newPq is visible to producers +// but has zero workers. func (bifrost *Bifrost) UpdateProvider(providerKey schemas.ModelProvider) error { bifrost.logger.Info(fmt.Sprintf("Updating provider configuration for provider %s", providerKey)) // Get the updated configuration from the account @@ -3213,23 +3222,23 @@ func (bifrost *Bifrost) UpdateProvider(providerKey schemas.ModelProvider) error queue: make(chan *ChannelMessage, providerConfig.ConcurrencyAndBufferSize.BufferSize), done: make(chan struct{}), signalOnce: sync.Once{}, - closeOnce: sync.Once{}, } - // Step 2: Atomically replace the queue FIRST (new producers immediately get the new queue) - // This minimizes the window where requests fail during the update + // Step 2: Atomically replace the queue so new producers immediately use newPq. bifrost.requestQueues.Store(providerKey, newPq) bifrost.logger.Debug("stored new queue for provider %s, new producers will use it", providerKey) - // Step 3: Signal old queue is closing to producers that already have a reference - // Only in-flight producers with the old reference will see this - oldPq.signalClosing() - bifrost.logger.Debug("signaled closing for old queue of provider %s", providerKey) - - // Step 4: Transfer any buffered requests from old queue to new queue - // This prevents request loss during the transition + // Step 3: Transfer buffered requests from the old queue to the new queue BEFORE + // signalling workers to stop. This ensures buffered requests are processed by the + // new workers rather than being drained with errors. + // Old workers are still running and may consume some items concurrently — that is + // fine, they process them normally. + // If newPq is full during transfer, all remaining buffered requests are cancelled + // immediately rather than blocking — this avoids the deadlock where transfer goroutines + // wait for space that only opens once new workers start (which can't happen until + // the transfer completes). transferredCount := 0 - var transferWaitGroup sync.WaitGroup + cancelledCount := 0 for { select { case msg := <-oldPq.queue: @@ -3237,37 +3246,33 @@ func (bifrost *Bifrost) UpdateProvider(providerKey schemas.ModelProvider) error case newPq.queue <- msg: transferredCount++ default: - // New queue is full, handle this request in a goroutine - // This is unlikely with proper buffer sizing but provides safety - transferWaitGroup.Add(1) - go func(m *ChannelMessage) { - defer transferWaitGroup.Done() + // newPq is full — cancel this message and all remaining in oldPq. + cancelMsg := func(r *ChannelMessage) { + prov, mod, _ := r.BifrostRequest.GetRequestFields() select { - case newPq.queue <- m: - // Message successfully transferred - case <-time.After(5 * time.Second): - bifrost.logger.Warn("Failed to transfer buffered request to new queue within timeout") - // Send error response to avoid hanging the client - provider, model, _ := m.BifrostRequest.GetRequestFields() - select { - case m.Err <- schemas.BifrostError{ - IsBifrostError: false, - Error: &schemas.ErrorField{ - Message: "request failed during provider concurrency update", - }, - ExtraFields: schemas.BifrostErrorExtraFields{ - RequestType: m.RequestType, - Provider: provider, - ModelRequested: model, - }, - }: - case <-time.After(1 * time.Second): - // If we can't send the error either, just log and continue - bifrost.logger.Warn("Failed to send error response during transfer timeout") - } + case r.Err <- schemas.BifrostError{ + IsBifrostError: false, + Error: &schemas.ErrorField{Message: "request failed during provider concurrency update: queue full"}, + ExtraFields: schemas.BifrostErrorExtraFields{ + RequestType: r.RequestType, + Provider: prov, + ModelRequested: mod, + }, + }: + case <-r.Context.Done(): } - }(msg) - goto transferComplete + } + cancelMsg(msg) + cancelledCount++ + for { + select { + case r := <-oldPq.queue: + cancelMsg(r) + cancelledCount++ + default: + goto transferComplete + } + } } default: // No more buffered messages @@ -3276,33 +3281,59 @@ func (bifrost *Bifrost) UpdateProvider(providerKey schemas.ModelProvider) error } transferComplete: - // Wait for all transfer goroutines to complete - transferWaitGroup.Wait() if transferredCount > 0 { bifrost.logger.Info("transferred %d buffered requests to new queue for provider %s", transferredCount, providerKey) } + if cancelledCount > 0 { + bifrost.logger.Warn("cancelled %d buffered requests during transfer for provider %s: new queue was full", cancelledCount, providerKey) + } - // Step 5: Close the old queue to signal workers to stop - oldPq.closeQueue() - bifrost.logger.Debug("closed old request queue for provider %s", providerKey) + // Step 4: Signal the old queue is closing. Producers that still hold a reference to + // oldPq will detect this via isClosing() and transparently re-route to newPq. + // This happens after the transfer so the new queue is already populated before + // stale producers attempt their re-route. + oldPq.signalClosing() + bifrost.logger.Debug("signaled closing for old queue of provider %s", providerKey) - // Step 6: Wait for all existing workers to finish processing in-flight requests + // Step 5: Wait for all existing workers to finish processing in-flight requests. + // Workers exit via oldPq.done (signalled above). waitGroup, exists := bifrost.waitGroups.Load(providerKey) if exists { waitGroup.(*sync.WaitGroup).Wait() bifrost.logger.Debug("all workers for provider %s have stopped", providerKey) } - // Step 7: Create new wait group for the updated workers + // Step 5b: Final drain sweep — see drainQueueWithErrors for full explanation. + bifrost.drainQueueWithErrors(oldPq) + + // Step 6: Create new wait group for the updated workers. bifrost.waitGroups.Store(providerKey, &sync.WaitGroup{}) - // Step 8: Create provider instance + // Step 7: Create provider instance. provider, err := bifrost.createBaseProvider(providerKey, providerConfig) if err != nil { - return fmt.Errorf("failed to create provider instance for %s: %v", providerKey, err) - } - - // Step 8.5: Atomically replace the provider in the providers slice + // Roll back: signal closing, remove from map, then drain. + // Order matters: Delete before drainQueueWithErrors so that producers + // re-routing via requestQueues.Load find nothing and return "provider + // shutting down" immediately, narrowing the TOCTOU window before the sweep. + newPq.signalClosing() + bifrost.requestQueues.Delete(providerKey) + bifrost.waitGroups.Delete(providerKey) + bifrost.drainQueueWithErrors(newPq) + if sliceErr := bifrost.removeProviderFromSlice(providerKey); sliceErr != nil { + bifrost.logger.Error( + "UpdateProvider rollback for %s is incomplete — provider was removed from queues "+ + "but could not be removed from the providers slice: %v. "+ + "bifrost.providers is now inconsistent. "+ + "To recover: call RemoveProvider(%s) then AddProvider to re-register it, "+ + "or restart Bifrost if that fails.", + providerKey, sliceErr, providerKey, + ) + } + return fmt.Errorf("provider update for %s failed during initialization; provider has been removed — re-add or retry UpdateProvider to restore it: %v", providerKey, err) + } + + // Step 8: Atomically replace the provider in the providers slice. // This must happen before starting new workers to prevent stale reads bifrost.logger.Debug("atomically replacing provider instance in providers slice for %s", providerKey) @@ -3312,7 +3343,21 @@ transferComplete: for { replacementAttempts++ if replacementAttempts > maxReplacementAttempts { - return fmt.Errorf("failed to replace provider %s in providers slice after %d attempts", providerKey, maxReplacementAttempts) + newPq.signalClosing() + bifrost.requestQueues.Delete(providerKey) + bifrost.waitGroups.Delete(providerKey) + bifrost.drainQueueWithErrors(newPq) + if sliceErr := bifrost.removeProviderFromSlice(providerKey); sliceErr != nil { + bifrost.logger.Error( + "UpdateProvider rollback for %s is incomplete — provider was removed from queues "+ + "but could not be removed from the providers slice: %v. "+ + "bifrost.providers is now inconsistent. "+ + "To recover: call RemoveProvider(%s) then AddProvider to re-register it, "+ + "or restart Bifrost if that fails.", + providerKey, sliceErr, providerKey, + ) + } + return fmt.Errorf("failed to replace provider %s in providers slice after %d attempts; provider has been removed — re-add or retry UpdateProvider to restore it", providerKey, maxReplacementAttempts) } oldPtr := bifrost.providers.Load() @@ -3348,7 +3393,7 @@ transferComplete: // Retrying as swapping did not work (likely due to concurrent modification) } - // Step 9: Start new workers with updated concurrency + // Step 9: Start new workers with updated concurrency. bifrost.logger.Debug("starting %d new workers for provider %s with buffer size %d", providerConfig.ConcurrencyAndBufferSize.Concurrency, providerKey, @@ -3384,6 +3429,33 @@ func (bifrost *Bifrost) getProviderMutex(providerKey schemas.ModelProvider) *syn return mutexValue.(*sync.RWMutex) } +// removeProviderFromSlice atomically removes the provider with the given key +// from bifrost.providers using a CAS retry loop. Callers hold the per-provider +// write mutex so no concurrent goroutine can re-add this key — contention is +// only from other providers' CAS operations, so the loop converges in at most +// a few iterations under any concurrency level. +// Returns an error if the limit is hit (state will be inconsistent). +func (bifrost *Bifrost) removeProviderFromSlice(providerKey schemas.ModelProvider) error { + const maxAttempts = 100 + for range maxAttempts { + oldPtr := bifrost.providers.Load() + if oldPtr == nil { + return nil + } + oldSlice := *oldPtr + newSlice := make([]schemas.Provider, 0, len(oldSlice)) + for _, p := range oldSlice { + if p.GetProviderKey() != providerKey { + newSlice = append(newSlice, p) + } + } + if bifrost.providers.CompareAndSwap(oldPtr, &newSlice) { + return nil + } + } + return fmt.Errorf("failed to remove provider %s from providers slice after %d attempts", providerKey, maxAttempts) +} + // MCP PUBLIC API // RegisterMCPTool registers a typed tool handler with the MCP integration. @@ -3694,7 +3766,6 @@ func (bifrost *Bifrost) prepareProvider(providerKey schemas.ModelProvider, confi queue: make(chan *ChannelMessage, config.ConcurrencyAndBufferSize.BufferSize), done: make(chan struct{}), signalOnce: sync.Once{}, - closeOnce: sync.Once{}, } bifrost.requestQueues.Store(providerKey, pq) @@ -4382,17 +4453,31 @@ func (bifrost *Bifrost) tryRequest(ctx *schemas.BifrostContext, req *schemas.Bif msg := bifrost.getChannelMessage(*preReq) msg.Context = ctx - // Check if provider is closing before attempting to send (lock-free atomic check) - // This prevents "send on closed channel" panics during provider removal/update + // If the queue is closing, check whether the provider was updated (new queue + // available) or removed. On update, transparently re-route to the new queue + // so in-flight producers don't get spurious errors. On removal, error out. + // + // Use a direct sync.Map lookup instead of getProviderQueue to avoid the + // lazy-creation path: getProviderQueue can resurrect a provider that was + // just removed by RemoveProvider if the account config still exists. if pq.isClosing() { - bifrost.releaseChannelMessage(msg) - bifrostErr := newBifrostErrorFromMsg("provider is shutting down") - bifrostErr.ExtraFields = schemas.BifrostErrorExtraFields{ - RequestType: req.RequestType, - Provider: provider, - ModelRequested: model, + var reroutedPq *ProviderQueue + if val, ok := bifrost.requestQueues.Load(provider); ok { + if candidate := val.(*ProviderQueue); candidate != pq && !candidate.isClosing() { + reroutedPq = candidate + } } - return nil, bifrostErr + if reroutedPq == nil { + bifrost.releaseChannelMessage(msg) + bifrostErr := newBifrostErrorFromMsg("provider is shutting down") + bifrostErr.ExtraFields = schemas.BifrostErrorExtraFields{ + RequestType: req.RequestType, + Provider: provider, + ModelRequested: model, + } + return nil, bifrostErr + } + pq = reroutedPq } // Use select with done channel to detect shutdown during send @@ -4492,7 +4577,13 @@ func (bifrost *Bifrost) tryRequest(ctx *schemas.BifrostContext, req *schemas.Bif } return resp, nil case <-ctx.Done(): - bifrost.releaseChannelMessage(msg) + // Do NOT releaseChannelMessage here. The message is already enqueued and + // the worker still holds a reference to msg.Response and msg.Err. Returning + // those channels to the pool now would let the next request reuse them while + // the worker is still writing to them — stale data corruption. The worker + // never calls releaseChannelMessage itself, so this message leaks from the + // pool and is GC'd. That is intentional: a small pool leak on cancellation + // is far safer than corrupting another request's channels. provider, model, _ := req.GetRequestFields() return nil, newBifrostCtxDoneError(ctx, provider, model, req.RequestType, "waiting for provider response") } @@ -4598,8 +4689,16 @@ func (bifrost *Bifrost) tryStreamRequest(ctx *schemas.BifrostContext, req *schem // shared processedResponse or processedError objects. streamResponse := providerUtils.BuildClientStreamChunk(ctx, processedResponse, processedError) - // Send the processed message to the output stream - outputStream <- streamResponse + // Guarded send: if the consumer abandons outputStream (client + // disconnect, ctx cancel), drain the upstream shortCircuit.Stream + // so its producer can exit cleanly instead of blocking on its send. + select { + case outputStream <- streamResponse: + case <-ctx.Done(): + for range shortCircuit.Stream { + } + return + } //TODO: Release the processed response immediately after use } @@ -4629,17 +4728,31 @@ func (bifrost *Bifrost) tryStreamRequest(ctx *schemas.BifrostContext, req *schem msg := bifrost.getChannelMessage(*preReq) msg.Context = ctx - // Check if provider is closing before attempting to send (lock-free atomic check) - // This prevents "send on closed channel" panics during provider removal/update + // If the queue is closing, check whether the provider was updated (new queue + // available) or removed. On update, transparently re-route to the new queue + // so in-flight producers don't get spurious errors. On removal, error out. + // + // Use a direct sync.Map lookup instead of getProviderQueue to avoid the + // lazy-creation path: getProviderQueue can resurrect a provider that was + // just removed by RemoveProvider if the account config still exists. if pq.isClosing() { - bifrost.releaseChannelMessage(msg) - bifrostErr := newBifrostErrorFromMsg("provider is shutting down") - bifrostErr.ExtraFields = schemas.BifrostErrorExtraFields{ - RequestType: req.RequestType, - Provider: provider, - ModelRequested: model, + var reroutedPq *ProviderQueue + if val, ok := bifrost.requestQueues.Load(provider); ok { + if candidate := val.(*ProviderQueue); candidate != pq && !candidate.isClosing() { + reroutedPq = candidate + } } - return nil, bifrostErr + if reroutedPq == nil { + bifrost.releaseChannelMessage(msg) + bifrostErr := newBifrostErrorFromMsg("provider is shutting down") + bifrostErr.ExtraFields = schemas.BifrostErrorExtraFields{ + RequestType: req.RequestType, + Provider: provider, + ModelRequested: model, + } + return nil, bifrostErr + } + pq = reroutedPq } // Use select with done channel to detect shutdown during send @@ -4721,6 +4834,11 @@ func (bifrost *Bifrost) tryStreamRequest(ctx *schemas.BifrostContext, req *schem return newBifrostMessageChan(recoveredResp), nil } return nil, &bifrostErrVal + case <-ctx.Done(): + // Do NOT releaseChannelMessage here — see the identical note in tryRequest. + // Worker still holds msg.ResponseStream/msg.Err; releasing now corrupts the + // next request that reuses those pooled channels. + return nil, newBifrostCtxDoneError(ctx, provider, model, req.RequestType, "while waiting for stream response") } } @@ -4842,7 +4960,7 @@ func executeRequestWithRetries[T any]( // the SSE stream instead of returning proper HTTP error status codes. if bifrostError == nil { if streamChan, ok := any(result).(chan *schemas.BifrostStreamChunk); ok { - checkedStream, drainDone, firstChunkErr := providerUtils.CheckFirstStreamChunkForError(streamChan) + checkedStream, drainDone, firstChunkErr := providerUtils.CheckFirstStreamChunkForError(ctx, streamChan) if firstChunkErr != nil { <-drainDone bifrostError = firstChunkErr @@ -4937,7 +5055,38 @@ func (bifrost *Bifrost) requestWorker(provider schemas.Provider, config *schemas } }() - for req := range pq.queue { + for { + var req *ChannelMessage + select { + case r := <-pq.queue: + req = r + case <-pq.done: + // Provider is shutting down. Drain any buffered requests and send + // back errors so callers are not left blocked on their response channel. + for { + select { + case r := <-pq.queue: + provKey, mod, _ := r.GetRequestFields() + select { + case r.Err <- schemas.BifrostError{ + IsBifrostError: false, + Error: &schemas.ErrorField{ + Message: "provider is shutting down", + }, + ExtraFields: schemas.BifrostErrorExtraFields{ + RequestType: r.RequestType, + Provider: provKey, + ModelRequested: mod, + }, + }: + case <-r.Context.Done(): + } + default: + return + } + } + } + _, model, _ := req.BifrostRequest.GetRequestFields() var result *schemas.BifrostResponse @@ -5075,12 +5224,16 @@ func (bifrost *Bifrost) requestWorker(provider schemas.Provider, config *schemas } return resp, nil } - // Store a finalizer callback to create aggregated post-hook spans at stream end - // This closure captures the pipeline reference and releases it after finalization + // Store a finalizer callback to create aggregated post-hook spans at stream end. + // Wrapped in sync.Once so the normal end-of-stream invocation and a deferred + // safety-net invocation (e.g. from a provider goroutine's panic path) cannot + // double-release the pipeline. + var finalizerOnce sync.Once postHookSpanFinalizer := func(ctx context.Context) { - pipeline.FinalizeStreamingPostHookSpans(ctx) - // Release the pipeline AFTER finalizing spans (not before streaming completes) - bifrost.releasePluginPipeline(pipeline) + finalizerOnce.Do(func() { + pipeline.FinalizeStreamingPostHookSpans(ctx) + bifrost.releasePluginPipeline(pipeline) + }) } req.Context.SetValue(schemas.BifrostContextKeyPostHookSpanFinalizer, postHookSpanFinalizer) } @@ -5206,6 +5359,16 @@ func (bifrost *Bifrost) handleProviderRequest(provider schemas.Provider, config } response.RerankResponse = rerankResponse case schemas.OCRRequest: + var customProviderConfig *schemas.CustomProviderConfig + if config != nil { + customProviderConfig = config.CustomProviderConfig + } + if bifrostError := providerUtils.CheckOperationAllowed(provider.GetProviderKey(), customProviderConfig, schemas.OCRRequest); bifrostError != nil { + if req.BifrostRequest.OCRRequest != nil { + bifrostError.ExtraFields.ModelRequested = req.BifrostRequest.OCRRequest.Model + } + return nil, bifrostError + } ocrResponse, bifrostError := provider.OCR(req.Context, key, req.BifrostRequest.OCRRequest) if bifrostError != nil { return nil, bifrostError @@ -5984,6 +6147,47 @@ func (bifrost *Bifrost) getChannelMessage(req schemas.BifrostRequest) *ChannelMe return msg } +// drainQueueWithErrors drains all buffered messages from pq and sends each a +// "provider is shutting down" error. It must be called after all workers for +// the queue have exited (i.e. after wg.Wait()) to cover the TOCTOU window: +// a producer that passed isClosing() just before signalClosing fired can still +// win the `case pq.queue <- msg` branch in tryRequest, landing a message in +// the queue after the last worker's drain loop already exited via `default:`. +// Without this sweep, those callers block forever on <-msg.Response / <-msg.Err. +// +// Residual TOCTOU window (known limitation): this sweep runs exactly once via +// a non-blocking `select { default: }`. A producer that deposits a message +// after the sweep's `default:` branch exits has no worker and no sweep to drain +// it — the caller will block until its own context is cancelled. Fully closing +// this window requires a sender-side reference count (so the last producer can +// signal "queue is fully idle"), which is intentionally not implemented because +// it would add per-send atomic overhead on the hot path. +func (bifrost *Bifrost) drainQueueWithErrors(pq *ProviderQueue) { + for { + select { + case r := <-pq.queue: + provKey, mod, _ := r.GetRequestFields() + select { + case r.Err <- schemas.BifrostError{ + IsBifrostError: false, + Error: &schemas.ErrorField{Message: "provider is shutting down"}, + ExtraFields: schemas.BifrostErrorExtraFields{ + RequestType: r.RequestType, + Provider: provKey, + ModelRequested: mod, + }, + }: + case <-r.Context.Done(): + // No time.After needed: r.Err is a buffered channel of size 1 freshly + // allocated per request, so the send always completes immediately unless + // the caller already cancelled. ctx.Done() is the only valid escape. + } + default: + return + } + } +} + // releaseChannelMessage returns a ChannelMessage and its channels to their respective pools. func (bifrost *Bifrost) releaseChannelMessage(msg *ChannelMessage) { // Put channels back in pools @@ -6491,15 +6695,12 @@ func (bifrost *Bifrost) Shutdown() { if bifrost.ctx.Err() == nil && bifrost.cancel != nil { bifrost.cancel() } - // ALWAYS close all provider queues to signal workers to stop, - // even if context was already cancelled. This prevents goroutine leaks. - // Use the ProviderQueue lifecycle: signal closing, then close the queue + // Signal all provider queues to close. Workers exit via pq.done; + // we never close pq.queue to avoid "send on closed channel" panics in + // producers that are concurrently in tryRequest. bifrost.requestQueues.Range(func(key, value interface{}) bool { pq := value.(*ProviderQueue) - // Signal closing to producers (uses sync.Once internally) pq.signalClosing() - // Close the queue to signal workers (uses sync.Once internally) - pq.closeQueue() return true }) @@ -6510,6 +6711,12 @@ func (bifrost *Bifrost) Shutdown() { return true }) + // Final drain sweep — same reasoning as RemoveProvider's Step 3b. + bifrost.requestQueues.Range(func(key, value interface{}) bool { + bifrost.drainQueueWithErrors(value.(*ProviderQueue)) + return true + }) + // Cleanup MCP manager if bifrost.MCPManager != nil { err := bifrost.MCPManager.Cleanup() diff --git a/core/bifrost_test.go b/core/bifrost_test.go index cb22f5e359..6944ed1d9d 100644 --- a/core/bifrost_test.go +++ b/core/bifrost_test.go @@ -3,8 +3,10 @@ package bifrost import ( "context" "fmt" + "runtime" "strings" "sync" + "sync/atomic" "testing" "time" @@ -1300,3 +1302,998 @@ func TestUpdateProvider_ProviderSliceIntegrity(t *testing.T) { } }) } + +// TestProviderQueue_SendOnClosedChannel_Race demonstrates the TOCTOU race that +// caused the "send on closed channel" production panic in the OLD code. +// +// The old code called close(pq.queue) during provider shutdown. The sequence: +// 1. Producer calls isClosing() → false (queue is still open) +// 2. Concurrently: shutdown calls signalClosing() then close(pq.queue) +// 3. Producer enters select { case pq.queue <- msg: ... case <-pq.done: ... } +// → PANIC: Go's selectgo iterates cases in a randomised pollorder. When the +// closed-channel send case is checked first, it immediately panics via +// goto sclose — before it can reach the done case. +// The case <-pq.done: guard only saves you when done happens to be checked +// first in that random ordering (≈50 % of the time with two cases). +// +// THE FIX: pq.queue is never closed. See the ProviderQueue struct comment for +// the full explanation. This test is kept as a proof-of-concept showing why +// closing pq.queue is unsafe; the fix is validated by TestProviderQueue_NoPanicWithoutCloseQueue. +// +// We run many iterations so that the panic is statistically certain to surface +// at least once, confirming the hypothesis. +func TestProviderQueue_SendOnClosedChannel_Race(t *testing.T) { + // With two select cases each iteration has a ~50 % chance of panicking. + // The probability of never panicking in 200 iterations is (0.5)^200 ≈ 0. + const iterations = 200 + panicCount := 0 + + for i := 0; i < iterations; i++ { + func() { + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + // Synchronization barriers to force the exact race interleaving. + passedIsClosingCheck := make(chan struct{}) + queueClosed := make(chan struct{}) + + var panicked bool + var wg sync.WaitGroup + wg.Add(1) + + // Producer — mirrors the hot path in tryRequest. + go func() { + defer wg.Done() + defer func() { + if r := recover(); r != nil && fmt.Sprint(r) == "send on closed channel" { + panicked = true + } + }() + + // Step 1: isClosing() passes — queue is open. + if pq.isClosing() { + return + } + + // Signal: past the isClosing() gate. + close(passedIsClosingCheck) + + // Wait for the queue to be closed. This represents the real work + // tryRequest does between the isClosing() check and the select + // (MCP setup, tracer lookup, plugin pipeline acquisition). + <-queueClosed + + // Step 2: enter the exact select guard used in production. + // pq.queue is closed AND pq.done is closed. + // When selectgo picks the send case first in its random pollorder + // it hits goto sclose and panics — the done case cannot save it. + msg := &ChannelMessage{} + select { + case pq.queue <- msg: // panics ~50 % of iterations + case <-pq.done: // selected the other ~50 % + } + }() + + // Closer — mirrors UpdateProvider / RemoveProvider. + go func() { + <-passedIsClosingCheck + pq.signalClosing() // closes done, sets closing = 1 + close(pq.queue) + close(queueClosed) // release the producer into the select + }() + + wg.Wait() + if panicked { + panicCount++ + } + }() + } + + if panicCount == 0 { + t.Fatalf("expected at least one 'send on closed channel' panic across %d iterations, got none", iterations) + } + t.Logf("confirmed: panic triggered in %d / %d iterations — hypothesis is correct", panicCount, iterations) +} + +// ============================================================================= +// ProviderQueue Unit Tests +// +// These tests exercise the ProviderQueue lifecycle in isolation — no full +// Bifrost instance required. They validate the core safety invariants that +// prevent the "send on closed channel" panic. +// ============================================================================= + +// newTestChannelMessage creates a minimal ChannelMessage suitable for drain tests. +// The Err channel is buffered (size 1) so the worker can send without blocking. +func newTestChannelMessage(ctx *schemas.BifrostContext) *ChannelMessage { + return &ChannelMessage{ + BifrostRequest: schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4", + }, + }, + Context: ctx, + Response: make(chan *schemas.BifrostResponse, 1), + Err: make(chan schemas.BifrostError, 1), + } +} + +// TestProviderQueue_IsClosingStateTransition verifies the atomic state flag: +// isClosing() must return false before signalClosing() and true after. +func TestProviderQueue_IsClosingStateTransition(t *testing.T) { + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + if pq.isClosing() { + t.Fatal("isClosing() must be false before signalClosing() is called") + } + + pq.signalClosing() + + if !pq.isClosing() { + t.Fatal("isClosing() must be true after signalClosing() is called") + } + + // done channel must also be closed + select { + case <-pq.done: + // correct: done is closed + default: + t.Fatal("pq.done must be closed after signalClosing()") + } + + // queue channel must remain OPEN — this is the core of the fix + // (sending should not panic even though done is closed) + panicked := false + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + select { + case pq.queue <- &ChannelMessage{}: + case <-pq.done: // done is closed so this is always ready — no panic + } + }() + if panicked { + t.Fatal("queue channel must stay open after signalClosing() — sending to it must not panic") + } +} + +// TestProviderQueue_SignalOnceIdempotent verifies that calling signalClosing() +// multiple times is safe. sync.Once ensures done is only closed once and the +// atomic store only happens once — no "close of closed channel" panic. +func TestProviderQueue_SignalOnceIdempotent(t *testing.T) { + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + defer func() { + if r := recover(); r != nil { + t.Fatalf("unexpected panic from multiple signalClosing() calls: %v", r) + } + }() + + pq.signalClosing() + pq.signalClosing() + pq.signalClosing() + + if !pq.isClosing() { + t.Fatal("isClosing() must be true after multiple signalClosing() calls") + } +} + +// TestProviderQueue_WorkerExitsViaDone verifies that a worker running the +// fixed select loop exits cleanly after signalClosing() without closeQueue(). +// Before the fix, workers used `for req := range pq.queue` which required +// the channel to be closed. After the fix, done is the exit signal. +func TestProviderQueue_WorkerExitsViaDone(t *testing.T) { + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + workerExited := make(chan struct{}) + + // Minimal worker loop — mirrors the exact select pattern in requestWorker + go func() { + defer close(workerExited) + for { + select { + case r, ok := <-pq.queue: + if !ok { + return + } + _ = r // process (no-op in this test) + case <-pq.done: + // Drain remaining buffered items (queue is empty here) + for { + select { + case <-pq.queue: + default: + return + } + } + } + } + }() + + // Worker is now blocked on the select. Signal shutdown WITHOUT closing queue. + pq.signalClosing() + + select { + case <-workerExited: + // correct: worker exited via done + case <-time.After(2 * time.Second): + t.Fatal("worker did not exit after signalClosing() — it may be stuck on range over unclosed channel") + } +} + +// TestProviderQueue_WorkerDrainSendsErrors verifies the drain behaviour when +// done fires while items are still buffered: every buffered ChannelMessage must +// receive a "provider is shutting down" error on its Err channel. No client +// should be left blocked waiting for a response that will never come. +// +// This test exercises the drain path directly — same code as requestWorker's +// case <-pq.done: branch — to avoid a non-deterministic select race between the +// normal processing path and the done path. +func TestProviderQueue_WorkerDrainSendsErrors(t *testing.T) { + const numBuffered = 5 + + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, numBuffered+2), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + + // Pre-fill queue — simulates requests buffered when done fires + msgs := make([]*ChannelMessage, numBuffered) + for i := 0; i < numBuffered; i++ { + msgs[i] = newTestChannelMessage(ctx) + pq.queue <- msgs[i] + } + + // Signal closing: done is now closed + pq.signalClosing() + + // Execute the drain path synchronously — exactly what requestWorker does in + // the case <-pq.done: branch. This is deterministic: we know done is closed + // and the queue has numBuffered items. + <-pq.done // fires immediately since signalClosing was already called +drainLoop: + for { + select { + case r := <-pq.queue: + provKey, mod, _ := r.GetRequestFields() + r.Err <- schemas.BifrostError{ + IsBifrostError: false, + Error: &schemas.ErrorField{ + Message: "provider is shutting down", + }, + ExtraFields: schemas.BifrostErrorExtraFields{ + RequestType: r.RequestType, + Provider: provKey, + ModelRequested: mod, + }, + } + default: + break drainLoop + } + } + + // Verify every message received a shutdown error + for i, msg := range msgs { + select { + case bifrostErr := <-msg.Err: + if bifrostErr.Error == nil { + t.Errorf("message %d: received nil Error field", i) + continue + } + if bifrostErr.Error.Message != "provider is shutting down" { + t.Errorf("message %d: expected 'provider is shutting down', got %q", + i, bifrostErr.Error.Message) + } + if bifrostErr.ExtraFields.Provider != schemas.OpenAI { + t.Errorf("message %d: expected provider %s, got %s", + i, schemas.OpenAI, bifrostErr.ExtraFields.Provider) + } + if bifrostErr.ExtraFields.RequestType != schemas.ChatCompletionRequest { + t.Errorf("message %d: expected requestType %v, got %v", + i, schemas.ChatCompletionRequest, bifrostErr.ExtraFields.RequestType) + } + default: + t.Errorf("message %d: no error received — client would be left hanging indefinitely", i) + } + } +} + +// TestProviderQueue_NoPanicWithoutCloseQueue verifies that the fixed hot path +// — select { case pq.queue <- msg | case <-pq.done } — never panics when +// signalClosing() fires but the queue channel is NOT closed. +// +// This is the direct inverse of TestProviderQueue_SendOnClosedChannel_Race: +// that test proves the old code panics ~50% of the time; this test proves +// the fixed code panics 0% of the time. +func TestProviderQueue_NoPanicWithoutCloseQueue(t *testing.T) { + const iterations = 500 + + for i := 0; i < iterations; i++ { + func() { + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + passedIsClosingCheck := make(chan struct{}) + shutdownDone := make(chan struct{}) + + var panicked bool + var wg sync.WaitGroup + wg.Add(1) + + // Producer: mirrors the tryRequest hot path after the fix. + // Passes isClosing(), waits for signalClosing, then sends. + // The queue channel is NEVER closed — only done is closed. + go func() { + defer wg.Done() + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + + if pq.isClosing() { + return + } + close(passedIsClosingCheck) + <-shutdownDone + + msg := &ChannelMessage{} + select { + case pq.queue <- msg: // queue is open → safe to send + case <-pq.done: // done is closed → selected immediately + } + }() + + // Closer: signal shutdown but never close the queue channel + go func() { + <-passedIsClosingCheck + pq.signalClosing() // closes done; does NOT close queue + close(shutdownDone) + }() + + wg.Wait() + + if panicked { + t.Errorf("iteration %d: unexpected panic — queue must not be closed in the fixed path", i) + } + }() + + if t.Failed() { + return + } + } + + t.Logf("confirmed: zero panics in %d iterations with the fix applied", iterations) +} + +// ============================================================================= +// UpdateProvider Lifecycle Tests +// +// These tests verify the three key invariants of the UpdateProvider fix: +// 1. New queue is stored BEFORE signalClosing fires (stale producers re-route) +// 2. Transfer happens BEFORE signalClosing (items go to new workers, not errored) +// 3. Concurrent producers + UpdateProvider produce zero panics +// ============================================================================= + +// TestUpdateProvider_StaleProducerReroutes verifies that a "stale producer" — +// a goroutine that fetched oldPq before UpdateProvider atomically replaced it — +// can transparently re-route to newPq when it later detects isClosing(). +// +// The re-routing logic in tryRequest is: +// +// if pq.isClosing() { +// if newPq, err := bifrost.getProviderQueue(provider); err == nil && newPq != pq { +// pq = newPq // transparent re-route +// } +// } +// +// This test exercises that exact sequence without a full Bifrost instance. +func TestUpdateProvider_StaleProducerReroutes(t *testing.T) { + var requestQueues sync.Map + provider := schemas.OpenAI + + oldPq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + newPq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + // Initial state: requestQueues holds oldPq + requestQueues.Store(provider, oldPq) + + // Stale producer: fetched its reference before UpdateProvider ran + stalePq := oldPq + + // Simulate UpdateProvider steps 2 + 4: + // Step 2: atomically replace — new producers now get newPq + requestQueues.Store(provider, newPq) + // Step 4: signal old closing — stale producers will detect this + oldPq.signalClosing() + + // --- Stale producer detects isClosing and attempts re-route --- + var reroutedPq *ProviderQueue + if stalePq.isClosing() { + if val, ok := requestQueues.Load(provider); ok { + candidate := val.(*ProviderQueue) + if candidate != stalePq { + reroutedPq = candidate + } + } + } + + if reroutedPq == nil { + t.Fatal("stale producer failed to re-route: re-route returned nil (check step ordering)") + } + if reroutedPq != newPq { + t.Fatal("stale producer re-routed to wrong queue: expected newPq") + } + if reroutedPq.isClosing() { + t.Fatal("re-routed queue is already closing — re-route is useless (newPq must be fresh)") + } + + // Verify: sending to re-routed queue succeeds without panic + panicked := false + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + msg := &ChannelMessage{} + select { + case reroutedPq.queue <- msg: + case <-reroutedPq.done: + t.Error("newPq.done fired — newPq should be open") + } + }() + if panicked { + t.Fatal("panic while sending to re-routed queue — queue must not be closed") + } +} + +// TestUpdateProvider_TransferOrdering verifies the ordering invariant: +// items are moved from oldPq to newPq BEFORE signalClosing(oldPq) is called. +// +// Observable consequence: during the entire transfer loop, oldPq.isClosing() +// must remain false. Only after transfer completes does signalClosing fire. +func TestUpdateProvider_TransferOrdering(t *testing.T) { + const numMessages = 8 + + oldPq := &ProviderQueue{ + queue: make(chan *ChannelMessage, numMessages+2), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + newPq := &ProviderQueue{ + queue: make(chan *ChannelMessage, numMessages+2), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + // Pre-fill oldPq — simulates buffered requests at the moment UpdateProvider runs + for i := 0; i < numMessages; i++ { + oldPq.queue <- &ChannelMessage{} + } + + // Invariant check before transfer begins + if oldPq.isClosing() { + t.Fatal("invariant violated: oldPq already closing before transfer begins") + } + + // Perform transfer, mirroring UpdateProvider step 3. + // Record whether isClosing() ever fired during the loop. + closingDuringTransfer := false + transferred := 0 + for { + select { + case msg := <-oldPq.queue: + if oldPq.isClosing() { + closingDuringTransfer = true + } + newPq.queue <- msg + transferred++ + default: + goto transferComplete + } + } +transferComplete: + + if closingDuringTransfer { + t.Error("invariant violated: oldPq was already closing during transfer — " + + "signalClosing must fire AFTER the transfer loop completes") + } + + // NOW signal closing, mirroring UpdateProvider step 4 + oldPq.signalClosing() + + if !oldPq.isClosing() { + t.Error("expected isClosing() == true after signalClosing()") + } + + // All messages must have moved to newPq + if transferred != numMessages { + t.Errorf("expected %d messages transferred, got %d", numMessages, transferred) + } + if len(newPq.queue) != numMessages { + t.Errorf("expected %d messages in newPq after transfer, got %d", numMessages, len(newPq.queue)) + } + if len(oldPq.queue) != 0 { + t.Errorf("expected 0 messages remaining in oldPq after transfer, got %d", len(oldPq.queue)) + } +} + +// TestUpdateProvider_NoPanicConcurrentAccess verifies that concurrent producers +// sending to a queue that is being replaced (UpdateProvider-style) never cause +// a "send on closed channel" panic. +// +// This test directly models the production scenario that triggered the bug: +// many goroutines continuously send to a ProviderQueue while UpdateProvider +// atomically swaps the queue and signals the old one closing. With the fix +// (queue channel is never closed), the select in producers is always safe. +func TestUpdateProvider_NoPanicConcurrentAccess(t *testing.T) { + const ( + numProducers = 10 + numUpdates = 30 + producerRunTime = 300 * time.Millisecond + ) + + var requestQueues sync.Map + provider := schemas.OpenAI + + makePq := func() *ProviderQueue { + return &ProviderQueue{ + queue: make(chan *ChannelMessage, 200), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + } + + initialPq := makePq() + requestQueues.Store(provider, initialPq) + + var panicCount int64 + var transferDropCount int64 + + stop := make(chan struct{}) + var producerWg sync.WaitGroup + + // Drainer: continuously empties queues so producers never block on a full queue + drainStop := make(chan struct{}) + go func() { + for { + select { + case <-drainStop: + return + default: + if val, ok := requestQueues.Load(provider); ok { + pq := val.(*ProviderQueue) + select { + case <-pq.queue: + default: + } + } + runtime.Gosched() + } + } + }() + + // Producers: continuously simulate the tryRequest hot path + for i := 0; i < numProducers; i++ { + producerWg.Add(1) + go func() { + defer producerWg.Done() + for { + select { + case <-stop: + return + default: + } + + val, ok := requestQueues.Load(provider) + if !ok { + runtime.Gosched() + continue + } + pq := val.(*ProviderQueue) + + func() { + defer func() { + if r := recover(); r != nil { + atomic.AddInt64(&panicCount, 1) + } + }() + + // Re-route check (mirrors tryRequest) + if pq.isClosing() { + if newVal, ok2 := requestQueues.Load(provider); ok2 { + if candidate := newVal.(*ProviderQueue); candidate != pq { + pq = candidate + } + } + // If still closing (RemoveProvider path), just return + if pq.isClosing() { + return + } + } + + msg := &ChannelMessage{} + select { + case pq.queue <- msg: + case <-pq.done: + case <-stop: // unblock immediately when the test signals stop + } + }() + + runtime.Gosched() + } + }() + } + + // Updater: repeatedly performs UpdateProvider-style queue replacements + var updaterWg sync.WaitGroup + updaterWg.Add(1) + go func() { + defer updaterWg.Done() + for i := 0; i < numUpdates; i++ { + val, ok := requestQueues.Load(provider) + if !ok { + continue + } + oldPq := val.(*ProviderQueue) + newPq := makePq() + + // Mirror production UpdateProvider step order exactly: + // Step 2: expose newPq first so stale producers can re-route to it + // once they see oldPq is closing. + requestQueues.Store(provider, newPq) + + // Step 3: transfer buffered messages oldPq → newPq. + drain: + for { + select { + case msg := <-oldPq.queue: + select { + case newPq.queue <- msg: + default: + // newPq full during transfer — mirrors production cancel path. + atomic.AddInt64(&transferDropCount, 1) + } + default: + break drain + } + } + + // Step 4: signal closing — producers holding a stale oldPq ref now + // re-route to newPq (already in the map from step 2). + oldPq.signalClosing() + + time.Sleep(5 * time.Millisecond) + } + }() + + time.Sleep(producerRunTime) + close(stop) + close(drainStop) + producerWg.Wait() + updaterWg.Wait() + + if n := atomic.LoadInt64(&panicCount); n > 0 { + t.Errorf("detected %d panic(s) — fix did not eliminate the concurrent-access race", n) + } else { + t.Logf("confirmed: zero panics across %d producers + %d queue replacements over %v", + numProducers, numUpdates, producerRunTime) + } + if drops := atomic.LoadInt64(&transferDropCount); drops > 0 { + t.Logf("note: %d message(s) dropped during transfer (oldPq had >200 buffered items) — does not affect panic correctness", drops) + } +} + +// ============================================================================= +// RemoveProvider Lifecycle Tests +// +// These tests verify the behavioral contract of RemoveProvider: +// 1. signalClosing() blocks new producers (isClosing() → true) +// 2. Buffered items in the queue get "provider is shutting down" errors +// 3. Workers exit cleanly and the WaitGroup reaches zero +// ============================================================================= + +// TestRemoveProvider_BlocksNewProducers verifies that after signalClosing(), +// isClosing() returns true. Producers check this flag before sending and return +// a "provider is shutting down" error rather than trying to enqueue. +func TestRemoveProvider_BlocksNewProducers(t *testing.T) { + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + // Sanity: before shutdown, producers can proceed + if pq.isClosing() { + t.Fatal("isClosing() must be false before RemoveProvider runs") + } + + // RemoveProvider step 2: signal closing + pq.signalClosing() + + // New producers must see isClosing() == true and abort + if !pq.isClosing() { + t.Fatal("isClosing() must be true after signalClosing() (RemoveProvider)") + } + + // done must be closed so any producer blocked in the select unblocks immediately + select { + case <-pq.done: + // correct + default: + t.Fatal("pq.done must be closed after signalClosing() so blocking producers unblock") + } + + // CRITICAL: queue channel must remain OPEN — closing it would cause panics in + // any producer that entered the select before seeing isClosing(). + // With the fix, we NEVER close the queue channel. + panicked := false + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + // A select with done closed always takes the done case — safe, no panic + select { + case pq.queue <- &ChannelMessage{}: + case <-pq.done: + } + }() + if panicked { + t.Fatal("queue channel must stay open after signalClosing() — closing it causes panics") + } +} + +// TestRemoveProvider_BufferedRequestsGetErrors verifies the drain contract: +// items queued BEFORE signalClosing fires must each receive a +// "provider is shutting down" error on their Err channel. No client should be +// left hanging. +// +// This test exercises the drain logic directly — the same code path that +// requestWorker executes in its case <-pq.done: branch — to avoid the +// non-deterministic select race where the normal processing path can pick up +// items before done fires. +func TestRemoveProvider_BufferedRequestsGetErrors(t *testing.T) { + const numBuffered = 8 + + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, numBuffered+5), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + + // Buffer requests — simulates requests already queued when RemoveProvider runs + msgs := make([]*ChannelMessage, numBuffered) + for i := 0; i < numBuffered; i++ { + msgs[i] = newTestChannelMessage(ctx) + pq.queue <- msgs[i] + } + + // RemoveProvider step 2: signal closing + pq.signalClosing() + + // Execute the drain path — exactly what requestWorker does in case <-pq.done: + <-pq.done // fires immediately since signalClosing was already called +drainLoop: + for { + select { + case r := <-pq.queue: + provKey, mod, _ := r.GetRequestFields() + r.Err <- schemas.BifrostError{ + IsBifrostError: false, + Error: &schemas.ErrorField{ + Message: "provider is shutting down", + }, + ExtraFields: schemas.BifrostErrorExtraFields{ + RequestType: r.RequestType, + Provider: provKey, + ModelRequested: mod, + }, + } + default: + break drainLoop + } + } + + // Every buffered message must have received a shutdown error + for i, msg := range msgs { + select { + case bifrostErr := <-msg.Err: + if bifrostErr.Error == nil { + t.Errorf("message %d: got nil Error field in BifrostError", i) + continue + } + if bifrostErr.Error.Message != "provider is shutting down" { + t.Errorf("message %d: expected 'provider is shutting down', got %q", + i, bifrostErr.Error.Message) + } + if bifrostErr.ExtraFields.Provider != schemas.OpenAI { + t.Errorf("message %d: expected provider %s, got %s", + i, schemas.OpenAI, bifrostErr.ExtraFields.Provider) + } + if bifrostErr.ExtraFields.RequestType != schemas.ChatCompletionRequest { + t.Errorf("message %d: expected requestType %v, got %v", + i, schemas.ChatCompletionRequest, bifrostErr.ExtraFields.RequestType) + } + default: + t.Errorf("message %d: no error received — client would be left hanging indefinitely", i) + } + } +} + +// TestRemoveProvider_WorkerWaitGroupCompletes verifies that after signalClosing(), +// the worker goroutine decrements the WaitGroup and wg.Wait() returns promptly. +// This mirrors what RemoveProvider does: signal, then Wait() before cleanup. +func TestRemoveProvider_WorkerWaitGroupCompletes(t *testing.T) { + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, 10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + var wg sync.WaitGroup + wg.Add(1) + + // Worker goroutine — mirrors requestWorker's WaitGroup contract + go func() { + defer wg.Done() + for { + select { + case r, ok := <-pq.queue: + if !ok { + return + } + _ = r + case <-pq.done: + // Drain remaining (empty in this test) + for { + select { + case <-pq.queue: + default: + return + } + } + } + } + }() + + // Tiny sleep to ensure worker is parked on select before we signal + time.Sleep(10 * time.Millisecond) + + // RemoveProvider step 2: signal closing + pq.signalClosing() + + // RemoveProvider step 3: wait for workers — must complete promptly + waitReturned := make(chan struct{}) + go func() { + wg.Wait() + close(waitReturned) + }() + + select { + case <-waitReturned: + // correct: WaitGroup reached zero after signalClosing() + case <-time.After(2 * time.Second): + t.Fatal("wg.Wait() did not return after signalClosing() — worker is stuck (would deadlock RemoveProvider)") + } +} + +// TestRemoveProvider_ConcurrentNewProducersDuringShutdown verifies that +// concurrent producers trying to enqueue after RemoveProvider calls +// signalClosing() all get safe "provider is shutting down" errors — none panic. +// This tests the TOCTOU window: producer passes isClosing() check, then done fires. +func TestRemoveProvider_ConcurrentNewProducersDuringShutdown(t *testing.T) { + const numProducers = 50 + + pq := &ProviderQueue{ + queue: make(chan *ChannelMessage, numProducers+10), + done: make(chan struct{}), + signalOnce: sync.Once{}, + } + + var panicCount int64 + var shutdownErrors int64 + var successfulSends int64 + + // Gate: all producers start together after isClosing() passes + passedGate := make(chan struct{}) + var gateOnce sync.Once + shutdownFired := make(chan struct{}) + + var producerWg sync.WaitGroup + + for i := 0; i < numProducers; i++ { + producerWg.Add(1) + go func() { + defer producerWg.Done() + defer func() { + if r := recover(); r != nil { + atomic.AddInt64(&panicCount, 1) + } + }() + + // Each producer checks isClosing() first (mirrors tryRequest) + if pq.isClosing() { + atomic.AddInt64(&shutdownErrors, 1) + return + } + + // Signal that at least one producer passed the isClosing() check + gateOnce.Do(func() { close(passedGate) }) + + // Wait for shutdown to be signaled (the TOCTOU window) + <-shutdownFired + + // Producers now enter the select — with the fix, done is closed but + // queue is NOT closed, so this select is always safe (no panic) + msg := &ChannelMessage{} + select { + case pq.queue <- msg: + atomic.AddInt64(&successfulSends, 1) + case <-pq.done: + atomic.AddInt64(&shutdownErrors, 1) + } + }() + } + + // Wait for at least one producer to pass the isClosing() gate + select { + case <-passedGate: + case <-time.After(2 * time.Second): + t.Fatal("no producer passed the isClosing() check within timeout") + } + + // Signal shutdown (RemoveProvider step 2) — this is the TOCTOU race + pq.signalClosing() + close(shutdownFired) + + producerWg.Wait() + + if n := atomic.LoadInt64(&panicCount); n > 0 { + t.Errorf("detected %d panic(s) — queue must not be closed during concurrent shutdown", n) + } + + t.Logf("result: %d successful sends, %d shutdown errors, %d panics across %d producers", + atomic.LoadInt64(&successfulSends), + atomic.LoadInt64(&shutdownErrors), + atomic.LoadInt64(&panicCount), + numProducers) +} diff --git a/core/go.mod b/core/go.mod index 924e204ea2..b85c403ec6 100644 --- a/core/go.mod +++ b/core/go.mod @@ -1,6 +1,6 @@ module github.com/maximhq/bifrost/core -go 1.26.2 +go 1.26.1 require ( cloud.google.com/go v0.123.0 diff --git a/core/internal/llmtests/account.go b/core/internal/llmtests/account.go index ac850830fd..0632f16b6b 100644 --- a/core/internal/llmtests/account.go +++ b/core/internal/llmtests/account.go @@ -88,7 +88,9 @@ type TestScenarios struct { Realtime bool // Realtime API (bidirectional audio/text) Compaction bool // Server-side compaction (context management) InterleavedThinking bool // Interleaved thinking between tool calls (beta) - FastMode bool // Fast mode for Opus 4.6 (beta: research preview) + FastMode bool // Fast mode for Opus 4.6 (beta: research preview) + EagerInputStreaming bool // Fine-grained tool input streaming (Anthropic fine-grained-tool-streaming-2025-05-14) + ServerToolsViaOpenAIEndpoint bool // Anthropic server-tool shapes in tools[] via /v1/chat/completions (web_search / web_fetch / code_execution) } // ComprehensiveTestConfig extends TestConfig with additional scenarios diff --git a/core/internal/llmtests/eager_input_streaming.go b/core/internal/llmtests/eager_input_streaming.go new file mode 100644 index 0000000000..0f074c46af --- /dev/null +++ b/core/internal/llmtests/eager_input_streaming.go @@ -0,0 +1,134 @@ +package llmtests + +import ( + "context" + "os" + "testing" + + bifrost "github.com/maximhq/bifrost/core" + "github.com/maximhq/bifrost/core/schemas" +) + +// RunEagerInputStreamingTest tests that setting eager_input_streaming: true on +// a custom tool succeeds end-to-end against the target Anthropic-family +// provider. Per Table 20 (verified against A overview + B-header), the +// fine-grained-tool-streaming-2025-05-14 beta is supported on Anthropic, +// Bedrock, Vertex, and Azure. +// +// The test verifies: +// 1. The request is accepted (no upstream 400 — which would indicate the +// fine-grained-tool-streaming-2025-05-14 beta header wasn't injected or +// is rejected by the target provider). +// 2. The stream produces a tool call with a valid JSON arguments payload. +// 3. The response is otherwise well-formed. +// +// This intentionally runs across all four providers (no single-provider gate +// unlike RunFastModeTest, which is Opus-4.6-only). +func RunEagerInputStreamingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) { + if !testConfig.Scenarios.EagerInputStreaming { + t.Logf("EagerInputStreaming not supported for provider %s", testConfig.Provider) + return + } + + t.Run("EagerInputStreaming", func(t *testing.T) { + if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { + t.Parallel() + } + + chatTool := GetSampleChatTool(SampleToolTypeWeather) + // Opt the tool into fine-grained input streaming. The neutral flag + // on ChatTool is promoted through ToAnthropicChatRequest, which also + // triggers the fine-grained-tool-streaming-2025-05-14 beta header. + eager := true + chatTool.EagerInputStreaming = &eager + + chatMessages := []schemas.ChatMessage{ + CreateBasicChatMessage("What's the weather like in San Francisco? answer in celsius"), + } + + request := &schemas.BifrostChatRequest{ + Provider: testConfig.Provider, + Model: testConfig.ChatModel, + Input: chatMessages, + Params: &schemas.ChatParameters{ + MaxCompletionTokens: bifrost.Ptr(200), + Tools: []schemas.ChatTool{*chatTool}, + }, + Fallbacks: testConfig.Fallbacks, + } + + retryConfig := StreamingRetryConfig() + retryContext := TestRetryContext{ + ScenarioName: "EagerInputStreaming", + ExpectedBehavior: map[string]interface{}{ + "should_stream_content": true, + "should_have_tool_calls": true, + "tool_name": "get_weather", + }, + TestMetadata: map[string]interface{}{ + "provider": testConfig.Provider, + "model": testConfig.ChatModel, + "eager_input_streaming": true, + }, + } + + responseChannel, err := WithStreamRetry(t, retryConfig, retryContext, func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { + bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) + return client.ChatCompletionStreamRequest(bfCtx, request) + }) + + RequireNoError(t, err, "Eager input streaming request failed") + if responseChannel == nil { + t.Fatal("Response channel should not be nil") + } + + accumulator := NewStreamingToolCallAccumulator() + var responseCount int + var sawAny bool + + t.Logf("🔧 Testing eager input streaming (fine-grained-tool-streaming-2025-05-14)...") + + for response := range responseChannel { + if response == nil || response.BifrostChatResponse == nil { + continue + } + responseCount++ + sawAny = true + + if response.BifrostChatResponse.Choices != nil { + for i, choice := range response.BifrostChatResponse.Choices { + if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil { + delta := choice.ChatStreamResponseChoice.Delta + for _, tc := range delta.ToolCalls { + accumulator.AccumulateChatToolCall(i, tc) + } + } + } + } + } + + if !sawAny { + t.Fatal("Expected at least one streaming response chunk") + } + t.Logf("Received %d chunks", responseCount) + + // Validate the accumulated tool call is well-formed. If the + // fine-grained-tool-streaming beta header weren't sent (or the + // provider rejected it), the upstream would have returned a 400 + // before any tool_use blocks were emitted. + toolCalls := accumulator.GetFinalChatToolCalls() + if len(toolCalls) == 0 { + t.Error("Expected at least one tool call in stream") + } + for _, tc := range toolCalls { + if tc.Name == "" { + t.Error("Tool call missing function name") + } + if tc.Arguments == "" { + t.Error("Tool call missing arguments JSON") + } + } + + t.Logf("EagerInputStreaming passed: %d tool calls accumulated", len(toolCalls)) + }) +} diff --git a/core/internal/llmtests/provider_feature_support_test.go b/core/internal/llmtests/provider_feature_support_test.go index 6e4738c282..539f4049c0 100644 --- a/core/internal/llmtests/provider_feature_support_test.go +++ b/core/internal/llmtests/provider_feature_support_test.go @@ -654,6 +654,77 @@ func TestProviderBetaHeaderInjection(t *testing.T) { }, expectHeaders: []string{"computer-use-2025-01-24"}, }, + + // ── Fine-grained tool streaming header (eager_input_streaming) ── + // Per cited citations (A overview table + B-header): EagerInputStreaming + // is supported on Anthropic, Bedrock, Vertex, and Azure — all four + // should auto-inject fine-grained-tool-streaming-2025-05-14 when a + // tool has eager_input_streaming: true. + { + name: "Anthropic/eager_input_streaming_header_added", + provider: schemas.Anthropic, + setupReq: func() *anthropic.AnthropicMessageRequest { + eager := true + return &anthropic.AnthropicMessageRequest{ + Tools: []anthropic.AnthropicTool{{Name: "t1", EagerInputStreaming: &eager}}, + } + }, + expectHeaders: []string{"fine-grained-tool-streaming-2025-05-14"}, + }, + { + name: "Bedrock/eager_input_streaming_header_added", + provider: schemas.Bedrock, + setupReq: func() *anthropic.AnthropicMessageRequest { + eager := true + return &anthropic.AnthropicMessageRequest{ + Tools: []anthropic.AnthropicTool{{Name: "t1", EagerInputStreaming: &eager}}, + } + }, + expectHeaders: []string{"fine-grained-tool-streaming-2025-05-14"}, + }, + { + name: "Vertex/eager_input_streaming_header_added", + provider: schemas.Vertex, + setupReq: func() *anthropic.AnthropicMessageRequest { + eager := true + return &anthropic.AnthropicMessageRequest{ + Tools: []anthropic.AnthropicTool{{Name: "t1", EagerInputStreaming: &eager}}, + } + }, + expectHeaders: []string{"fine-grained-tool-streaming-2025-05-14"}, + }, + { + name: "Azure/eager_input_streaming_header_added", + provider: schemas.Azure, + setupReq: func() *anthropic.AnthropicMessageRequest { + eager := true + return &anthropic.AnthropicMessageRequest{ + Tools: []anthropic.AnthropicTool{{Name: "t1", EagerInputStreaming: &eager}}, + } + }, + expectHeaders: []string{"fine-grained-tool-streaming-2025-05-14"}, + }, + { + name: "eager_input_streaming_header_skipped_when_flag_false", + provider: schemas.Anthropic, + setupReq: func() *anthropic.AnthropicMessageRequest { + eager := false + return &anthropic.AnthropicMessageRequest{ + Tools: []anthropic.AnthropicTool{{Name: "t1", EagerInputStreaming: &eager}}, + } + }, + unexpectHeaders: []string{"fine-grained-tool-streaming-2025-05-14"}, + }, + { + name: "eager_input_streaming_header_skipped_when_unset", + provider: schemas.Anthropic, + setupReq: func() *anthropic.AnthropicMessageRequest { + return &anthropic.AnthropicMessageRequest{ + Tools: []anthropic.AnthropicTool{{Name: "t1"}}, + } + }, + unexpectHeaders: []string{"fine-grained-tool-streaming-2025-05-14"}, + }, } for _, tt := range tests { diff --git a/core/internal/llmtests/server_tools_via_openai.go b/core/internal/llmtests/server_tools_via_openai.go new file mode 100644 index 0000000000..c5ee1d2000 --- /dev/null +++ b/core/internal/llmtests/server_tools_via_openai.go @@ -0,0 +1,152 @@ +package llmtests + +import ( + "context" + "os" + "strings" + "testing" + + bifrost "github.com/maximhq/bifrost/core" + "github.com/maximhq/bifrost/core/schemas" +) + +// RunServerToolsViaOpenAIEndpointTest reproduces the user-reported bug where +// sending an Anthropic-server-tool-shaped entry in tools[] via the OpenAI- +// compatible chat-completions endpoint was silently dropped (Claude responded +// with a prose "I can't check real-time data" fallback). The fix was a +// combination of: +// - ChatTool schema gaining Name + all server-tool variant fields. +// - ToAnthropicChatRequest learning to convert non-function tools (server +// tools) into AnthropicTool with the correct variant embed. +// +// This test sends the exact curl-reported shape via BifrostChatRequest + +// ChatCompletionRequest and asserts the request succeeds end-to-end against +// the provider. It covers three server tools that have single-turn triggers +// (web_search, web_fetch, code_execution) across all supporting providers per +// Table 20. Other variants (bash, memory, text_editor, tool_search, +// mcp_toolset, computer_use) require multi-turn tool loops or infra setup +// and are covered by the schema / unit-level round-trip tests instead. +func RunServerToolsViaOpenAIEndpointTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) { + if !testConfig.Scenarios.ServerToolsViaOpenAIEndpoint { + t.Logf("ServerToolsViaOpenAIEndpoint not supported for provider %s", testConfig.Provider) + return + } + + cases := []struct { + name string + toolType schemas.ChatToolType + toolName string + prompt string + // extra lets the case set server-tool metadata (max_uses etc.). + extra func(*schemas.ChatTool) + // supported reports whether this tool is supported on the given + // provider per Table 20 (cited provider feature matrix). + supported func(schemas.ModelProvider) bool + }{ + { + name: "web_search", + toolType: "web_search_20260209", + toolName: "web_search", + prompt: "What is the weather in San Francisco today? Use the web_search tool.", + extra: func(t *schemas.ChatTool) { + five := 5 + t.MaxUses = &five + t.AllowedCallers = []string{"direct"} + }, + // web_search: Anthropic + Vertex + Azure per Table 20 (not Bedrock). + supported: func(p schemas.ModelProvider) bool { + return p == schemas.Anthropic || p == schemas.Vertex || p == schemas.Azure + }, + }, + { + name: "web_fetch", + toolType: "web_fetch_20260309", + toolName: "web_fetch", + prompt: "Fetch https://example.com and summarise the title.", + extra: func(t *schemas.ChatTool) { + three := 3 + t.MaxUses = &three + }, + // web_fetch: Anthropic + Azure only per Table 20. + supported: func(p schemas.ModelProvider) bool { + return p == schemas.Anthropic || p == schemas.Azure + }, + }, + { + name: "code_execution", + toolType: "code_execution_20250825", + toolName: "code_execution", + prompt: "Compute 2^64 minus 1 using the code_execution tool and return the result.", + // code_execution: Anthropic + Azure only per Table 20. + supported: func(p schemas.ModelProvider) bool { + return p == schemas.Anthropic || p == schemas.Azure + }, + }, + } + + t.Run("ServerToolsViaOpenAIEndpoint", func(t *testing.T) { + if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { + t.Parallel() + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + if !tc.supported(testConfig.Provider) { + t.Skipf("%s not supported on %s per Table 20", tc.name, testConfig.Provider) + } + if os.Getenv("SKIP_PARALLEL_TESTS") != "true" { + t.Parallel() + } + + tool := schemas.ChatTool{ + Type: tc.toolType, + Name: tc.toolName, + } + if tc.extra != nil { + tc.extra(&tool) + } + + req := &schemas.BifrostChatRequest{ + Provider: testConfig.Provider, + Model: testConfig.ChatModel, + Input: []schemas.ChatMessage{ + CreateBasicChatMessage(tc.prompt), + }, + Params: &schemas.ChatParameters{ + MaxCompletionTokens: bifrost.Ptr(500), + Tools: []schemas.ChatTool{tool}, + }, + Fallbacks: testConfig.Fallbacks, + } + + bfCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) + resp, err := client.ChatCompletionRequest(bfCtx, req) + if err != nil { + t.Fatalf("%s tool request failed: %s", tc.name, GetErrorMessage(err)) + } + if resp == nil { + t.Fatal("expected non-nil response") + } + + // Regression signals: + // 1. Upstream accepted the request (no error). + // 2. Response is not the prose fallback Claude emits when + // the server-tool was silently stripped pre-fix + // ("I can't/cannot/don't have access to real-time ..."). + // The schema + conversion unit tests prove the outbound + // request carries the tool; this live test proves the + // provider accepts the shape AND actually uses the tool + // rather than answering from parametric memory. + content := GetChatContent(resp) + lc := strings.ToLower(content) + if strings.Contains(lc, "can't access real-time") || + strings.Contains(lc, "cannot access real-time") || + strings.Contains(lc, "don't have access to real-time") { + t.Fatalf("%s regression: tool appears to be ignored, content=%q", tc.name, content) + } + t.Logf("%s tool live call succeeded: chars=%d", tc.name, len(content)) + }) + } + }) +} diff --git a/core/internal/llmtests/tests.go b/core/internal/llmtests/tests.go index af3006b9a1..108894feb4 100644 --- a/core/internal/llmtests/tests.go +++ b/core/internal/llmtests/tests.go @@ -120,6 +120,8 @@ func RunAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context RunCompactionTest, RunInterleavedThinkingTest, RunFastModeTest, + RunEagerInputStreamingTest, + RunServerToolsViaOpenAIEndpointTest, } // Execute all test scenarios without raw request/response (default behavior) @@ -239,6 +241,8 @@ func printTestSummary(t *testing.T, testConfig ComprehensiveTestConfig) { {"Compaction", testConfig.Scenarios.Compaction}, {"InterleavedThinking", testConfig.Scenarios.InterleavedThinking}, {"FastMode", testConfig.Scenarios.FastMode}, + {"EagerInputStreaming", testConfig.Scenarios.EagerInputStreaming}, + {"ServerToolsViaOpenAIEndpoint", testConfig.Scenarios.ServerToolsViaOpenAIEndpoint}, } supported := 0 diff --git a/core/internal/mcptests/annotations_test.go b/core/internal/mcptests/annotations_test.go new file mode 100644 index 0000000000..e85b54a79f --- /dev/null +++ b/core/internal/mcptests/annotations_test.go @@ -0,0 +1,220 @@ +package mcptests + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/maximhq/bifrost/core/schemas" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ============================================================================= +// MCP ANNOTATION TESTS +// +// These tests verify two invariants of the MCP annotations feature: +// +// 1. PRESERVATION: annotations attached to a registered tool survive the full +// MCP→Bifrost conversion and remain accessible on ChatTool.Annotations +// after retrieval from the manager. +// +// 2. ISOLATION: annotations are tagged json:"-" on ChatTool, so they are never +// included in the JSON body forwarded to LLM providers. +// ============================================================================= + +// TestAnnotations_PreservedAfterToolRegistration verifies that annotations set +// on an InProcess ChatTool schema are stored in the tool map without modification. +func TestAnnotations_PreservedAfterToolRegistration(t *testing.T) { + t.Parallel() + + readOnly := true + idempotent := true + + manager := setupMCPManager(t) + + toolSchema := schemas.ChatTool{ + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "read_resource", + Description: schemas.Ptr("Reads a resource"), + Parameters: &schemas.ToolFunctionParameters{ + Type: "object", + Properties: schemas.NewOrderedMapFromPairs( + schemas.KV("uri", map[string]interface{}{ + "type": "string", + "description": "URI of the resource to read", + }), + ), + Required: []string{"uri"}, + }, + }, + Annotations: &schemas.MCPToolAnnotations{ + Title: "Resource Reader", + ReadOnlyHint: &readOnly, + IdempotentHint: &idempotent, + }, + } + + err := manager.RegisterTool( + "read_resource", + "Reads a resource", + func(args any) (string, error) { return `{"ok":true}`, nil }, + toolSchema, + ) + require.NoError(t, err) + + ctx := createTestContext() + toolPerClient := manager.GetToolPerClient(ctx) + + var found *schemas.ChatTool +outer1: + for _, tools := range toolPerClient { + for i := range tools { + if tools[i].Function != nil && strings.HasSuffix(tools[i].Function.Name, "-read_resource") { + cp := tools[i] + found = &cp + break outer1 + } + } + } + require.NotNil(t, found, "read_resource tool should be present in the tool map") + + // Annotations must be preserved on ChatTool (not lost after registration) + require.NotNil(t, found.Annotations, "Annotations should be preserved on ChatTool") + assert.Equal(t, "Resource Reader", found.Annotations.Title) + require.NotNil(t, found.Annotations.ReadOnlyHint) + assert.True(t, *found.Annotations.ReadOnlyHint) + require.NotNil(t, found.Annotations.IdempotentHint) + assert.True(t, *found.Annotations.IdempotentHint) + assert.Nil(t, found.Annotations.DestructiveHint) + assert.Nil(t, found.Annotations.OpenWorldHint) +} + +// TestAnnotations_AbsentFromProviderJSON verifies that annotations do NOT appear +// in the JSON representation of a tool — i.e. the payload that would be forwarded +// to an LLM provider. +func TestAnnotations_AbsentFromProviderJSON(t *testing.T) { + t.Parallel() + + readOnly := true + destructive := false + + manager := setupMCPManager(t) + + toolSchema := schemas.ChatTool{ + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "write_file", + Description: schemas.Ptr("Writes content to a file"), + Parameters: &schemas.ToolFunctionParameters{ + Type: "object", + Properties: schemas.NewOrderedMapFromPairs( + schemas.KV("path", map[string]interface{}{ + "type": "string", + "description": "Destination file path", + }), + schemas.KV("content", map[string]interface{}{ + "type": "string", + "description": "Content to write", + }), + ), + Required: []string{"path", "content"}, + }, + }, + Annotations: &schemas.MCPToolAnnotations{ + Title: "File Writer", + ReadOnlyHint: &readOnly, + DestructiveHint: &destructive, + }, + } + + err := manager.RegisterTool( + "write_file", + "Writes content to a file", + func(args any) (string, error) { return `{"ok":true}`, nil }, + toolSchema, + ) + require.NoError(t, err) + + ctx := createTestContext() + toolPerClient := manager.GetToolPerClient(ctx) + + var found *schemas.ChatTool +outer2: + for _, tools := range toolPerClient { + for i := range tools { + if tools[i].Function != nil && strings.HasSuffix(tools[i].Function.Name, "-write_file") { + cp := tools[i] + found = &cp + break outer2 + } + } + } + require.NotNil(t, found, "write_file tool should be present in the tool map") + + // The tool must have annotations in memory + require.NotNil(t, found.Annotations, "Annotations must be in memory for downstream use") + + // Serialize the tool as a provider would receive it + toolJSON, err := json.Marshal(found) + require.NoError(t, err) + s := string(toolJSON) + + // None of the annotation data must leak into the JSON. + // Use the key token `"annotations":` to avoid false positives from description text. + assert.NotContains(t, s, `"annotations":`, "annotations key must be absent from provider JSON") + assert.NotContains(t, s, "readOnlyHint", "readOnlyHint must be absent from provider JSON") + assert.NotContains(t, s, "destructiveHint", "destructiveHint must be absent from provider JSON") + assert.NotContains(t, s, "File Writer", "annotation title must be absent from provider JSON") + + // The function definition itself must still be present + assert.Contains(t, s, "write_file", "function name must be present in provider JSON") + assert.Contains(t, s, "path", "parameter must be present in provider JSON") +} + +// TestAnnotations_DeepCopyPreservesAnnotations verifies that the deep-copy path +// (used during plugin accumulation and streaming) correctly copies annotations. +func TestAnnotations_DeepCopyPreservesAnnotations(t *testing.T) { + t.Parallel() + + readOnly := true + + original := schemas.ChatTool{ + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "read_config", + Description: schemas.Ptr("Reads configuration from disk"), + }, + Annotations: &schemas.MCPToolAnnotations{ + Title: "Config Reader", + ReadOnlyHint: &readOnly, + }, + } + + copied := schemas.DeepCopyChatTool(original) + + // Annotations must survive the deep copy + require.NotNil(t, copied.Annotations, "Annotations must be preserved after deep copy") + assert.Equal(t, "Config Reader", copied.Annotations.Title) + require.NotNil(t, copied.Annotations.ReadOnlyHint) + assert.True(t, *copied.Annotations.ReadOnlyHint) + + // Mutate via the pointed-to value to detect pointer aliasing + *original.Annotations.ReadOnlyHint = false + assert.NotSame(t, original.Annotations.ReadOnlyHint, copied.Annotations.ReadOnlyHint, + "deep copy must not share the ReadOnlyHint pointer with the original") + assert.True(t, *copied.Annotations.ReadOnlyHint, + "mutating original's ReadOnlyHint must not affect the deep copy") + + // JSON of the copy must also be annotation-free (same guarantee as the original) + toolJSON, err := json.Marshal(copied) + require.NoError(t, err) + s := string(toolJSON) + // Check for the JSON key pattern, not just the substring, to avoid false positives + // from description text. The key would appear as `"annotations":` in JSON. + assert.NotContains(t, s, `"annotations":`, + "annotations key must be absent from provider JSON even after deep copy") + assert.NotContains(t, s, "readOnlyHint", + "readOnlyHint must be absent from provider JSON even after deep copy") +} diff --git a/core/mcp/utils.go b/core/mcp/utils.go index d80ec17acc..1356bb38bb 100644 --- a/core/mcp/utils.go +++ b/core/mcp/utils.go @@ -487,6 +487,28 @@ func convertMCPToolToBifrostSchema(mcpTool *mcp.Tool, logger schemas.Logger) sch // object schemas to always have a properties field, even if empty properties = schemas.NewOrderedMap() } + + // Preserve MCP tool annotations if any are set. + // Clone bool pointers so Bifrost's copy is independent of the upstream mcp.Tool lifetime. + var annotations *schemas.MCPToolAnnotations + a := mcpTool.Annotations + if a.Title != "" || a.ReadOnlyHint != nil || a.DestructiveHint != nil || a.IdempotentHint != nil || a.OpenWorldHint != nil { + cloneBool := func(b *bool) *bool { + if b == nil { + return nil + } + v := *b + return &v + } + annotations = &schemas.MCPToolAnnotations{ + Title: a.Title, + ReadOnlyHint: cloneBool(a.ReadOnlyHint), + DestructiveHint: cloneBool(a.DestructiveHint), + IdempotentHint: cloneBool(a.IdempotentHint), + OpenWorldHint: cloneBool(a.OpenWorldHint), + } + } + return schemas.ChatTool{ Type: schemas.ChatToolTypeFunction, Function: &schemas.ChatToolFunction{ @@ -498,6 +520,7 @@ func convertMCPToolToBifrostSchema(mcpTool *mcp.Tool, logger schemas.Logger) sch Required: mcpTool.InputSchema.Required, }, }, + Annotations: annotations, } } diff --git a/core/mcp/utils_test.go b/core/mcp/utils_test.go index e74d9d7da1..1fba67db38 100644 --- a/core/mcp/utils_test.go +++ b/core/mcp/utils_test.go @@ -1,9 +1,13 @@ package mcp import ( + "encoding/json" "testing" "github.com/mark3labs/mcp-go/mcp" + "github.com/maximhq/bifrost/core/schemas" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // TestConvertMCPToolToBifrostSchema_EmptyParameters tests that tools with no parameters @@ -49,6 +53,68 @@ func TestConvertMCPToolToBifrostSchema_EmptyParameters(t *testing.T) { } } +// TestConvertMCPToolToBifrostSchema_WithAnnotations tests that MCP tool annotations +// are preserved on ChatTool.Annotations (not ChatToolFunction) and are absent from JSON. +func TestConvertMCPToolToBifrostSchema_WithAnnotations(t *testing.T) { + readOnly := true + destructive := false + + mcpTool := &mcp.Tool{ + Name: "read_resource", + Description: "Reads a resource", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]interface{}{}, + }, + Annotations: mcp.ToolAnnotation{ + Title: "Resource Reader", + ReadOnlyHint: &readOnly, + DestructiveHint: &destructive, + IdempotentHint: schemas.Ptr(true), + }, + } + + bifrostTool := convertMCPToolToBifrostSchema(mcpTool, defaultLogger) + + // Annotations must be on ChatTool, not buried in Function + require.NotNil(t, bifrostTool.Annotations, "Annotations should be set on ChatTool") + assert.Equal(t, "Resource Reader", bifrostTool.Annotations.Title) + require.NotNil(t, bifrostTool.Annotations.ReadOnlyHint) + assert.True(t, *bifrostTool.Annotations.ReadOnlyHint) + require.NotNil(t, bifrostTool.Annotations.DestructiveHint) + assert.False(t, *bifrostTool.Annotations.DestructiveHint) + require.NotNil(t, bifrostTool.Annotations.IdempotentHint) + assert.True(t, *bifrostTool.Annotations.IdempotentHint) + assert.Nil(t, bifrostTool.Annotations.OpenWorldHint) + + // The JSON sent to providers must not contain annotations + toolJSON, err := json.Marshal(bifrostTool) + require.NoError(t, err) + s := string(toolJSON) + assert.NotContains(t, s, "annotations", "annotations must be absent from provider JSON") + assert.NotContains(t, s, "readOnlyHint", "readOnlyHint must be absent from provider JSON") + assert.NotContains(t, s, "Resource Reader", "annotation title must be absent from provider JSON") +} + +// TestConvertMCPToolToBifrostSchema_NilAnnotationsWhenAllZero verifies the nil guard: +// when all annotation fields are zero-valued, ChatTool.Annotations must remain nil. +func TestConvertMCPToolToBifrostSchema_NilAnnotationsWhenAllZero(t *testing.T) { + mcpTool := &mcp.Tool{ + Name: "no_hints_tool", + Description: "A tool with no annotation hints", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]interface{}{}, + }, + Annotations: mcp.ToolAnnotation{}, // All zero values — Title empty, all hints nil + } + + bifrostTool := convertMCPToolToBifrostSchema(mcpTool, defaultLogger) + + assert.Nil(t, bifrostTool.Annotations, + "Annotations should be nil when all MCP annotation fields are zero") +} + // TestConvertMCPToolToBifrostSchema_WithParameters tests the normal case with parameters func TestConvertMCPToolToBifrostSchema_WithParameters(t *testing.T) { // Create a tool with parameters diff --git a/core/providers/anthropic/anthropic.go b/core/providers/anthropic/anthropic.go index e5d62fe87c..e012f3a13f 100644 --- a/core/providers/anthropic/anthropic.go +++ b/core/providers/anthropic/anthropic.go @@ -450,6 +450,28 @@ func (provider *AnthropicProvider) ChatCompletion(ctx *schemas.BifrostContext, k return nil, bifrostErr } + // On the raw-body passthrough path, the typed-struct StripUnsupportedAnthropicFields + // was not invoked. Apply the JSON-level sanitizer for behavioural parity so + // unsupported request-level and tool-level fields don't leak to providers that + // would reject them. + if useRawBody, ok := ctx.Value(schemas.BifrostContextKeyUseRawRequestBody).(bool); ok && useRawBody { + // Feature gating keyed to schemas.Anthropic (not provider.GetProviderKey()) + // so custom Anthropic aliases get the same feature lookup as the typed + // path above (line 445), keeping raw and typed behavior in lockstep. + sanitized, rawErr := stripUnsupportedFieldsFromRawBody(jsonData, schemas.Anthropic, request.Model) + if rawErr != nil { + return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderRequestMarshal, rawErr, provider.GetProviderKey()) + } + jsonData = sanitized + // Auto-inject matching anthropic-beta headers for fields the sanitizer + // preserved. Probe-unmarshal reuses the typed path's header walker so + // the two paths stay in lockstep. + var probe AnthropicMessageRequest + if err := schemas.Unmarshal(jsonData, &probe); err == nil { + AddMissingBetaHeadersToContext(ctx, &probe, schemas.Anthropic) + } + } + // Use struct directly for JSON marshaling responseBody, latency, providerResponseHeaders, err := provider.completeRequest(ctx, jsonData, provider.buildRequestURL(ctx, "/v1/messages", schemas.ChatCompletionRequest), key.Value.GetValue(), &providerUtils.RequestMetadata{ Provider: provider.GetProviderKey(), @@ -534,6 +556,25 @@ func (provider *AnthropicProvider) ChatCompletionStream(ctx *schemas.BifrostCont return nil, bifrostErr } + // On the raw-body passthrough path, the typed-struct StripUnsupportedAnthropicFields + // was not invoked. Apply the JSON-level sanitizer for behavioural parity. + if useRawBody, ok := ctx.Value(schemas.BifrostContextKeyUseRawRequestBody).(bool); ok && useRawBody { + // Feature gating keyed to schemas.Anthropic (not provider.GetProviderKey()) + // to keep raw and typed paths in lockstep on custom aliases — mirrors + // the typed path's hardcoded schemas.Anthropic at line 548. + sanitized, rawErr := stripUnsupportedFieldsFromRawBody(jsonData, schemas.Anthropic, request.Model) + if rawErr != nil { + return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderRequestMarshal, rawErr, provider.GetProviderKey()) + } + jsonData = sanitized + // Auto-inject matching anthropic-beta headers for fields the sanitizer + // preserved. Probe-unmarshal reuses the typed path's header walker. + var probe AnthropicMessageRequest + if err := schemas.Unmarshal(jsonData, &probe); err == nil { + AddMissingBetaHeadersToContext(ctx, &probe, schemas.Anthropic) + } + } + // Prepare Anthropic headers headers := map[string]string{ "Content-Type": "application/json", @@ -660,6 +701,7 @@ func HandleAnthropicChatCompletionStreaming( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { model := "unknown" if meta != nil { @@ -1146,6 +1188,7 @@ func HandleAnthropicResponsesStream( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { model := "" if meta != nil { @@ -1186,6 +1229,7 @@ func HandleAnthropicResponsesStream( stopCancellation := providerUtils.SetupStreamCancellation(ctx, resp.BodyStream(), logger) defer stopCancellation() + sseReader := providerUtils.GetSSEEventReader(ctx, reader) chunkIndex := 0 @@ -2745,6 +2789,7 @@ func (provider *AnthropicProvider) PassthroughStream( ch := make(chan *schemas.BifrostStreamChunk, schemas.DefaultStreamBufferSize) go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, ch, provider.GetProviderKey(), req.Model, schemas.PassthroughStreamRequest, provider.logger) @@ -2757,6 +2802,7 @@ func (provider *AnthropicProvider) PassthroughStream( defer stopIdleTimeout() defer stopCancellation() + buf := make([]byte, 4096) for { n, readErr := bodyStream.Read(buf) diff --git a/core/providers/anthropic/anthropic_test.go b/core/providers/anthropic/anthropic_test.go index d64b10aa82..6cb05f8c8c 100644 --- a/core/providers/anthropic/anthropic_test.go +++ b/core/providers/anthropic/anthropic_test.go @@ -72,7 +72,9 @@ func TestAnthropic(t *testing.T) { PassthroughAPI: true, Compaction: true, InterleavedThinking: true, - FastMode: false, // Enable when test API key has Opus 4.6 access + FastMode: false, // Enable when test API key has Opus 4.6 access + EagerInputStreaming: true, // fine-grained-tool-streaming-2025-05-14 (GA on Anthropic) + ServerToolsViaOpenAIEndpoint: true, // web_search / web_fetch / code_execution via /v1/chat/completions }, } diff --git a/core/providers/anthropic/chat.go b/core/providers/anthropic/chat.go index 93c0e7c1d0..a72ffbe024 100644 --- a/core/providers/anthropic/chat.go +++ b/core/providers/anthropic/chat.go @@ -3,6 +3,7 @@ package anthropic import ( "encoding/json" "fmt" + "strings" "time" "github.com/bytedance/sonic" @@ -10,6 +11,231 @@ import ( "github.com/maximhq/bifrost/core/schemas" ) +// convertFunctionToolToAnthropic turns an OpenAI-style function tool +// (schemas.ChatTool with non-nil Function) into an AnthropicTool. +// Factored out from ToAnthropicChatRequest's tool loop so the loop can branch +// cleanly between function and server-tool shapes. +func convertFunctionToolToAnthropic(tool schemas.ChatTool) AnthropicTool { + anthropicTool := AnthropicTool{ + Name: tool.Function.Name, + } + if tool.Function.Description != nil { + anthropicTool.Description = tool.Function.Description + } + + // Convert function parameters to input_schema + if tool.Function.Parameters != nil && (tool.Function.Parameters.Type != "" || tool.Function.Parameters.Properties != nil) { + anthropicTool.InputSchema = &schemas.ToolFunctionParameters{ + Type: tool.Function.Parameters.Type, + Description: tool.Function.Parameters.Description, + Properties: tool.Function.Parameters.Properties, + Required: tool.Function.Parameters.Required, + Enum: tool.Function.Parameters.Enum, + AdditionalProperties: tool.Function.Parameters.AdditionalProperties, + Defs: tool.Function.Parameters.Defs, + Definitions: tool.Function.Parameters.Definitions, + Ref: tool.Function.Parameters.Ref, + Items: tool.Function.Parameters.Items, + MinItems: tool.Function.Parameters.MinItems, + MaxItems: tool.Function.Parameters.MaxItems, + AnyOf: tool.Function.Parameters.AnyOf, + OneOf: tool.Function.Parameters.OneOf, + AllOf: tool.Function.Parameters.AllOf, + Format: tool.Function.Parameters.Format, + Pattern: tool.Function.Parameters.Pattern, + MinLength: tool.Function.Parameters.MinLength, + MaxLength: tool.Function.Parameters.MaxLength, + Minimum: tool.Function.Parameters.Minimum, + Maximum: tool.Function.Parameters.Maximum, + Title: tool.Function.Parameters.Title, + Default: tool.Function.Parameters.Default, + Nullable: tool.Function.Parameters.Nullable, + } + } + + if anthropicTool.InputSchema != nil { + anthropicTool.InputSchema = anthropicTool.InputSchema.Normalized() + } + + if tool.CacheControl != nil { + anthropicTool.CacheControl = tool.CacheControl + } + if tool.DeferLoading != nil { + anthropicTool.DeferLoading = tool.DeferLoading + } + if len(tool.AllowedCallers) > 0 { + anthropicTool.AllowedCallers = tool.AllowedCallers + } + if len(tool.InputExamples) > 0 { + anthropicTool.InputExamples = make([]AnthropicToolInputExample, len(tool.InputExamples)) + for i, ex := range tool.InputExamples { + anthropicTool.InputExamples[i] = AnthropicToolInputExample{ + Input: ex.Input, + Description: ex.Description, + } + } + } + if tool.EagerInputStreaming != nil { + anthropicTool.EagerInputStreaming = tool.EagerInputStreaming + } + // ChatToolFunction.Strict is the canonical neutral slot for Anthropic's strict. + if tool.Function.Strict != nil { + anthropicTool.Strict = tool.Function.Strict + } + return anthropicTool +} + +// convertServerToolToAnthropic reconstructs an AnthropicTool from the +// server-tool shape of a schemas.ChatTool (Function=nil, Name+Type+variant +// fields populated). Returns (tool, true) when Type looks like a known +// server-tool; (zero, false) when it doesn't, so the caller can drop it +// cleanly rather than forward a malformed tool. +// +// Supported type prefixes: +// - web_search_* → AnthropicToolWebSearch +// - web_fetch_* → AnthropicToolWebFetch +// - computer_* → AnthropicToolComputerUse +// - text_editor_* → AnthropicToolTextEditor +// - mcp_toolset → AnthropicMCPToolsetTool (via MCPToolset pointer) +// +// bash_*, memory_*, code_execution_*, and tool_search_* carry no variant +// config — their Type + Name alone are enough, handled in the default branch. +func convertServerToolToAnthropic(tool schemas.ChatTool) (AnthropicTool, bool) { + typeStr := string(tool.Type) + if typeStr == "" { + return AnthropicTool{}, false + } + + // mcp_toolset is serialized via a dedicated embedded type (AnthropicMCPToolsetTool) + // and carries its identity in MCPServerName, not Name — handle before the + // generic Name guard below. + if typeStr == "mcp_toolset" { + if tool.MCPServerName == "" { + return AnthropicTool{}, false + } + toolset := &AnthropicMCPToolsetTool{ + Type: "mcp_toolset", + MCPServerName: tool.MCPServerName, + DefaultConfig: convertMCPToolsetConfig(tool.DefaultConfig), + Configs: convertMCPToolsetConfigMap(tool.Configs), + CacheControl: tool.CacheControl, + } + return AnthropicTool{MCPToolset: toolset}, true + } + + // Remaining server tools (web_search, web_fetch, computer, text_editor, etc.) + // identify themselves via Name. + if tool.Name == "" { + return AnthropicTool{}, false + } + + atype := AnthropicToolType(typeStr) + anthropicTool := AnthropicTool{ + Name: tool.Name, + Type: &atype, + CacheControl: tool.CacheControl, + DeferLoading: tool.DeferLoading, + AllowedCallers: tool.AllowedCallers, + EagerInputStreaming: tool.EagerInputStreaming, + } + if len(tool.InputExamples) > 0 { + anthropicTool.InputExamples = make([]AnthropicToolInputExample, len(tool.InputExamples)) + for i, ex := range tool.InputExamples { + anthropicTool.InputExamples[i] = AnthropicToolInputExample{ + Input: ex.Input, + Description: ex.Description, + } + } + } + + switch { + case strings.HasPrefix(typeStr, "web_search_"): + anthropicTool.AnthropicToolWebSearch = &AnthropicToolWebSearch{ + MaxUses: tool.MaxUses, + AllowedDomains: tool.AllowedDomains, + BlockedDomains: tool.BlockedDomains, + UserLocation: convertUserLocation(tool.UserLocation), + } + case strings.HasPrefix(typeStr, "web_fetch_"): + anthropicTool.AnthropicToolWebFetch = &AnthropicToolWebFetch{ + MaxUses: tool.MaxUses, + AllowedDomains: tool.AllowedDomains, + BlockedDomains: tool.BlockedDomains, + MaxContentTokens: tool.MaxContentTokens, + Citations: convertCitationsConfig(tool.Citations), + UseCache: tool.UseCache, + } + case strings.HasPrefix(typeStr, "computer_"): + anthropicTool.AnthropicToolComputerUse = &AnthropicToolComputerUse{ + DisplayWidthPx: tool.DisplayWidthPx, + DisplayHeightPx: tool.DisplayHeightPx, + DisplayNumber: tool.DisplayNumber, + EnableZoom: tool.EnableZoom, + } + case strings.HasPrefix(typeStr, "text_editor_"): + anthropicTool.AnthropicToolTextEditor = &AnthropicToolTextEditor{ + MaxCharacters: tool.MaxCharacters, + } + case strings.HasPrefix(typeStr, "bash_"), + strings.HasPrefix(typeStr, "memory_"), + strings.HasPrefix(typeStr, "code_execution_"), + strings.HasPrefix(typeStr, "tool_search_tool_"): + // No variant-specific config — Type + Name alone. + default: + // Unknown type — pass through Type + Name and let Anthropic reject + // if it's truly invalid. This keeps forward-compat for new tool + // versions that aren't yet known to Bifrost. + } + return anthropicTool, true +} + +// convertUserLocation mirrors schemas.ChatToolUserLocation onto +// AnthropicToolWebSearchUserLocation. +func convertUserLocation(loc *schemas.ChatToolUserLocation) *AnthropicToolWebSearchUserLocation { + if loc == nil { + return nil + } + return &AnthropicToolWebSearchUserLocation{ + Type: loc.Type, + City: loc.City, + Region: loc.Region, + Country: loc.Country, + Timezone: loc.Timezone, + } +} + +// convertCitationsConfig mirrors the request-side citations config +// ({"enabled": true/false}) onto AnthropicCitations' request form. +func convertCitationsConfig(c *schemas.ChatToolCitationsConfig) *AnthropicCitations { + if c == nil { + return nil + } + return &AnthropicCitations{Config: &schemas.Citations{Enabled: c.Enabled}} +} + +// convertMCPToolsetConfig mirrors a single mcp_toolset config. +func convertMCPToolsetConfig(c *schemas.ChatMCPToolsetConfig) *AnthropicMCPToolsetConfig { + if c == nil { + return nil + } + return &AnthropicMCPToolsetConfig{ + Enabled: c.Enabled, + DeferLoading: c.DeferLoading, + } +} + +// convertMCPToolsetConfigMap mirrors the per-tool mcp_toolset configs map. +func convertMCPToolsetConfigMap(m map[string]*schemas.ChatMCPToolsetConfig) map[string]*AnthropicMCPToolsetConfig { + if len(m) == 0 { + return nil + } + out := make(map[string]*AnthropicMCPToolsetConfig, len(m)) + for k, v := range m { + out[k] = convertMCPToolsetConfig(v) + } + return out +} + // ToAnthropicChatRequest converts a Bifrost request to Anthropic format // This is the reverse of ConvertChatRequestToBifrost for provider-side usage func ToAnthropicChatRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.BifrostChatRequest) (*AnthropicMessageRequest, error) { @@ -30,29 +256,59 @@ func ToAnthropicChatRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.Bif anthropicReq.MaxTokens = *bifrostReq.Params.MaxCompletionTokens } - // Anthropic doesn't allow both temperature and top_p to be specified - // If both are present, prefer temperature (more commonly used) - if bifrostReq.Params.Temperature != nil { - anthropicReq.Temperature = bifrostReq.Params.Temperature - } else if bifrostReq.Params.TopP != nil { - anthropicReq.TopP = bifrostReq.Params.TopP + // Opus 4.7+ rejects temperature, top_p, and top_k with a 400 error. + if !IsOpus47(bifrostReq.Model) { + // Anthropic doesn't allow both temperature and top_p to be specified. + // If both are present, prefer temperature (more commonly used). + if bifrostReq.Params.Temperature != nil { + anthropicReq.Temperature = bifrostReq.Params.Temperature + } else if bifrostReq.Params.TopP != nil { + anthropicReq.TopP = bifrostReq.Params.TopP + } } anthropicReq.StopSequences = bifrostReq.Params.Stop - topK, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["top_k"]) - if ok { + + // TopK — prefer the promoted neutral field; fall back to ExtraParams. + // Opus 4.7+ rejects top_k with a 400 error. + if bifrostReq.Params.TopK != nil { + if !IsOpus47(bifrostReq.Model) { + anthropicReq.TopK = bifrostReq.Params.TopK + } + } else if topK, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["top_k"]); ok { delete(anthropicReq.ExtraParams, "top_k") - anthropicReq.TopK = topK + if !IsOpus47(bifrostReq.Model) { + anthropicReq.TopK = topK + } } - if speed, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["speed"]); ok { + + // Speed — prefer neutral field, then ExtraParams. + if bifrostReq.Params.Speed != nil { + anthropicReq.Speed = bifrostReq.Params.Speed + } else if speed, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["speed"]); ok { delete(anthropicReq.ExtraParams, "speed") anthropicReq.Speed = speed } - // extract inference_geo and context management - if inferenceGeo, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["inference_geo"]); ok { + + // InferenceGeo — prefer neutral field, then ExtraParams. + if bifrostReq.Params.InferenceGeo != nil { + anthropicReq.InferenceGeo = bifrostReq.Params.InferenceGeo + } else if inferenceGeo, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["inference_geo"]); ok { delete(anthropicReq.ExtraParams, "inference_geo") anthropicReq.InferenceGeo = inferenceGeo } - if cmVal := bifrostReq.Params.ExtraParams["context_management"]; cmVal != nil { + + // ContextManagement — the neutral type is json.RawMessage; decode to + // the Anthropic-shape ContextManagement. Fall back to ExtraParams + // (legacy map-valued or typed-pointer paths) if the raw is empty. + // Surface decode errors on the typed path so callers get immediate + // feedback on malformed config instead of a silent drop. + if len(bifrostReq.Params.ContextManagement) > 0 { + var cm ContextManagement + if err := sonic.Unmarshal(bifrostReq.Params.ContextManagement, &cm); err != nil { + return nil, fmt.Errorf("context_management: failed to parse: %w", err) + } + anthropicReq.ContextManagement = &cm + } else if cmVal := bifrostReq.Params.ExtraParams["context_management"]; cmVal != nil { if cm, ok := cmVal.(*ContextManagement); ok && cm != nil { delete(anthropicReq.ExtraParams, "context_management") anthropicReq.ContextManagement = cm @@ -64,6 +320,65 @@ func ToAnthropicChatRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.Bif } } } + + // Container — map the neutral ChatContainer union onto the Anthropic + // AnthropicContainer union. Both follow the string-or-object pattern. + if bifrostReq.Params.Container != nil { + c := &AnthropicContainer{} + if bifrostReq.Params.Container.ContainerStr != nil { + c.ContainerStr = bifrostReq.Params.Container.ContainerStr + } else if bifrostReq.Params.Container.ContainerObject != nil { + obj := &AnthropicContainerObject{ + ID: bifrostReq.Params.Container.ContainerObject.ID, + } + if len(bifrostReq.Params.Container.ContainerObject.Skills) > 0 { + obj.Skills = make([]AnthropicContainerSkill, len(bifrostReq.Params.Container.ContainerObject.Skills)) + for i, sk := range bifrostReq.Params.Container.ContainerObject.Skills { + obj.Skills[i] = AnthropicContainerSkill{ + SkillID: sk.SkillID, + Type: sk.Type, + Version: sk.Version, + } + } + } + c.ContainerObject = obj + } + anthropicReq.Container = c + } + + // Top-level CacheControl on the request. + if bifrostReq.Params.CacheControl != nil { + anthropicReq.CacheControl = bifrostReq.Params.CacheControl + } + + // TaskBudget — maps onto output_config.task_budget. If an OutputConfig + // already exists (e.g. from structured outputs), attach the budget to + // it; otherwise create one. + if bifrostReq.Params.TaskBudget != nil { + tb := &AnthropicTaskBudget{ + Type: bifrostReq.Params.TaskBudget.Type, + Total: bifrostReq.Params.TaskBudget.Total, + Remaining: bifrostReq.Params.TaskBudget.Remaining, + } + if anthropicReq.OutputConfig == nil { + anthropicReq.OutputConfig = &AnthropicOutputConfig{} + } + anthropicReq.OutputConfig.TaskBudget = tb + } + + // MCPServers — mirror the neutral ChatMCPServer[] to AnthropicMCPServerV2[]. + if len(bifrostReq.Params.MCPServers) > 0 { + servers := make([]AnthropicMCPServerV2, len(bifrostReq.Params.MCPServers)) + for i, s := range bifrostReq.Params.MCPServers { + servers[i] = AnthropicMCPServerV2{ + Type: s.Type, + URL: s.URL, + Name: s.Name, + AuthorizationToken: s.AuthorizationToken, + } + } + anthropicReq.MCPServers = servers + } if bifrostReq.Params.ResponseFormat != nil { // Vertex doesn't support native structured outputs, so convert to tool if bifrostReq.Provider == schemas.Vertex { @@ -87,65 +402,32 @@ func ToAnthropicChatRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.Bif } } - // Convert tools + // Convert tools. Three neutral ChatTool shapes are supported: + // (1) Function tool (tool.Function != nil) — existing path. + // (2) Anthropic server tool (tool.Function == nil, Type is a + // server-tool version string, Name populated at top level) — + // new path handled by convertServerToolToAnthropic. + // (3) Custom tool (tool.Custom != nil) — not currently forwarded + // to Anthropic; skipped. if bifrostReq.Params.Tools != nil { - tools := make([]AnthropicTool, 0, len(bifrostReq.Params.Tools)) - for _, tool := range bifrostReq.Params.Tools { - if tool.Function == nil { + // Strip server tools the target provider doesn't support per + // ProviderFeatures (e.g. web_search on Vertex's non-supporting + // model variants, or MCP on Bedrock when this converter is used + // by non-Bedrock providers). Function/custom tools are always + // kept. The dropped set is discarded — "silent strip + continue" + // policy per user direction. See Bedrock's convertToolConfig for + // the direct-Bedrock-path equivalent. + filtered, _ := ValidateChatToolsForProvider(bifrostReq.Params.Tools, bifrostReq.Provider) + tools := make([]AnthropicTool, 0, len(filtered)) + for _, tool := range filtered { + if tool.Function != nil { + tools = append(tools, convertFunctionToolToAnthropic(tool)) continue } - anthropicTool := AnthropicTool{ - Name: tool.Function.Name, - } - if tool.Function.Description != nil { - anthropicTool.Description = tool.Function.Description - } - - // Convert function parameters to input_schema - if tool.Function.Parameters != nil && (tool.Function.Parameters.Type != "" || tool.Function.Parameters.Properties != nil) { - anthropicTool.InputSchema = &schemas.ToolFunctionParameters{ - Type: tool.Function.Parameters.Type, - Description: tool.Function.Parameters.Description, - Properties: tool.Function.Parameters.Properties, - Required: tool.Function.Parameters.Required, - Enum: tool.Function.Parameters.Enum, - AdditionalProperties: tool.Function.Parameters.AdditionalProperties, - // JSON Schema definition fields - Defs: tool.Function.Parameters.Defs, - Definitions: tool.Function.Parameters.Definitions, - Ref: tool.Function.Parameters.Ref, - // Array schema fields - Items: tool.Function.Parameters.Items, - MinItems: tool.Function.Parameters.MinItems, - MaxItems: tool.Function.Parameters.MaxItems, - // Composition fields - AnyOf: tool.Function.Parameters.AnyOf, - OneOf: tool.Function.Parameters.OneOf, - AllOf: tool.Function.Parameters.AllOf, - // String validation fields - Format: tool.Function.Parameters.Format, - Pattern: tool.Function.Parameters.Pattern, - MinLength: tool.Function.Parameters.MinLength, - MaxLength: tool.Function.Parameters.MaxLength, - // Number validation fields - Minimum: tool.Function.Parameters.Minimum, - Maximum: tool.Function.Parameters.Maximum, - // Misc fields - Title: tool.Function.Parameters.Title, - Default: tool.Function.Parameters.Default, - Nullable: tool.Function.Parameters.Nullable, - } + // Non-function tool: attempt server-tool reconstruction. + if converted, ok := convertServerToolToAnthropic(tool); ok { + tools = append(tools, converted) } - - if anthropicTool.InputSchema != nil { - anthropicTool.InputSchema = anthropicTool.InputSchema.Normalized() - } - - if tool.CacheControl != nil { - anthropicTool.CacheControl = tool.CacheControl - } - - tools = append(tools, anthropicTool) } if anthropicReq.Tools == nil { anthropicReq.Tools = tools @@ -189,23 +471,28 @@ func ToAnthropicChatRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.Bif // Convert reasoning if bifrostReq.Params.Reasoning != nil { if bifrostReq.Params.Reasoning.MaxTokens != nil { - budgetTokens := *bifrostReq.Params.Reasoning.MaxTokens - if *bifrostReq.Params.Reasoning.MaxTokens == -1 { - // anthropic does not support dynamic reasoning budget like gemini - // setting it to default max tokens - budgetTokens = MinimumReasoningMaxTokens - } - if budgetTokens < MinimumReasoningMaxTokens { - return nil, fmt.Errorf("reasoning.max_tokens must be >= %d for anthropic", MinimumReasoningMaxTokens) - } - anthropicReq.Thinking = &AnthropicThinking{ - Type: "enabled", - BudgetTokens: schemas.Ptr(budgetTokens), + if IsOpus47(bifrostReq.Model) { + // Opus 4.7+: budget_tokens removed; adaptive thinking is the only thinking-on mode. + anthropicReq.Thinking = &AnthropicThinking{Type: "adaptive"} + } else { + budgetTokens := *bifrostReq.Params.Reasoning.MaxTokens + if *bifrostReq.Params.Reasoning.MaxTokens == -1 { + // anthropic does not support dynamic reasoning budget like gemini + // setting it to default max tokens + budgetTokens = MinimumReasoningMaxTokens + } + if budgetTokens < MinimumReasoningMaxTokens { + return nil, fmt.Errorf("reasoning.max_tokens must be >= %d for anthropic", MinimumReasoningMaxTokens) + } + anthropicReq.Thinking = &AnthropicThinking{ + Type: "enabled", + BudgetTokens: schemas.Ptr(budgetTokens), + } } } else if bifrostReq.Params.Reasoning.Effort != nil && *bifrostReq.Params.Reasoning.Effort != "none" { effort := MapBifrostEffortToAnthropic(*bifrostReq.Params.Reasoning.Effort) - if SupportsAdaptiveThinking(bifrostReq.Model) { - // Opus 4.6+: adaptive thinking + native effort + if SupportsAdaptiveThinking(bifrostReq.Model) || IsOpus47(bifrostReq.Model) { + // Opus 4.6+ and Opus 4.7+: adaptive thinking + native effort anthropicReq.Thinking = &AnthropicThinking{Type: "adaptive"} setEffortOnOutputConfig(anthropicReq, effort) } else if SupportsNativeEffort(bifrostReq.Model) { @@ -235,6 +522,18 @@ func ToAnthropicChatRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.Bif Type: "disabled", } } + + // thinking.display — map the neutral ChatReasoning.Display onto + // AnthropicThinking.Display. Valid for "enabled" and "adaptive" + // modes only; Anthropic rejects display on "disabled" ("there is + // nothing to display", per the extended-thinking doc). We attach + // on non-disabled modes and let the upstream provider enforce + // model-level support. + if bifrostReq.Params.Reasoning.Display != nil && + anthropicReq.Thinking != nil && + anthropicReq.Thinking.Type != "disabled" { + anthropicReq.Thinking.Display = bifrostReq.Params.Reasoning.Display + } } // Convert service tier @@ -407,6 +706,11 @@ func ToAnthropicChatRequest(ctx *schemas.BifrostContext, bifrostReq *schemas.Bif anthropicReq.Messages = anthropicMessages anthropicReq.System = systemContent + // Strip request- and tool-level fields the target Anthropic-family + // provider does not support. Fail-closed tool validation stays in + // ValidateToolsForProvider; this is strip-silently for additive fields. + stripUnsupportedAnthropicFields(anthropicReq, bifrostReq.Provider, bifrostReq.Model) + return anthropicReq, nil } diff --git a/core/providers/anthropic/chat_server_tools_test.go b/core/providers/anthropic/chat_server_tools_test.go new file mode 100644 index 0000000000..cf830fab02 --- /dev/null +++ b/core/providers/anthropic/chat_server_tools_test.go @@ -0,0 +1,366 @@ +package anthropic + +import ( + "encoding/json" + "testing" + + "github.com/bytedance/sonic" + "github.com/maximhq/bifrost/core/schemas" +) + +// TestChatTool_ServerToolRoundTrip verifies that every Anthropic server-tool +// variant survives Marshal/Unmarshal through the neutral ChatTool schema. +// This locks in the fix for the user-reported bug where a raw JSON tool like +// {"type":"web_search_20260209","name":"web_search","max_uses":5} was being +// dropped at the neutral-schema layer because ChatTool had no slots for the +// server-tool metadata. +func TestChatTool_ServerToolRoundTrip(t *testing.T) { + five := 5 + ptrTrue := true + w, h := 1280, 800 + maxChars := 16000 + maxContent := 32000 + + cases := []struct { + name string + raw string + }{ + { + name: "web_search_20260209", + raw: `{"type":"web_search_20260209","name":"web_search","max_uses":5,"allowed_callers":["direct"]}`, + }, + { + name: "web_search_with_domains", + raw: `{"type":"web_search_20250305","name":"web_search","allowed_domains":["example.com","docs.example.com"]}`, + }, + { + name: "web_search_with_user_location", + raw: `{"type":"web_search_20250305","name":"web_search","user_location":{"type":"approximate","city":"San Francisco","country":"US","timezone":"America/Los_Angeles"}}`, + }, + { + name: "web_fetch_20260309", + raw: `{"type":"web_fetch_20260309","name":"web_fetch","max_uses":5,"max_content_tokens":32000,"citations":{"enabled":true},"use_cache":true}`, + }, + { + name: "computer_20251124", + raw: `{"type":"computer_20251124","name":"computer","display_width_px":1280,"display_height_px":800,"display_number":1,"enable_zoom":true}`, + }, + { + name: "text_editor_20250728", + raw: `{"type":"text_editor_20250728","name":"str_replace_based_edit_tool","max_characters":16000}`, + }, + { + name: "bash_20250124", + raw: `{"type":"bash_20250124","name":"bash"}`, + }, + { + name: "memory_20250818", + raw: `{"type":"memory_20250818","name":"memory"}`, + }, + { + name: "code_execution_20250825", + raw: `{"type":"code_execution_20250825","name":"code_execution"}`, + }, + { + name: "tool_search_tool_bm25", + raw: `{"type":"tool_search_tool_bm25","name":"tool_search_tool_bm25"}`, + }, + { + name: "mcp_toolset", + raw: `{"type":"mcp_toolset","name":"my_mcp","mcp_server_name":"notion","configs":{"search":{"enabled":true}}}`, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // Variant-specific field assertions. Invoked twice — once after + // initial decode, once after round-trip — so that a regression in + // MarshalSorted that silently drops any variant-specific field + // fails this test instead of sneaking through. + assertVariantFields := func(label string, tl schemas.ChatTool) { + t.Helper() + switch tc.name { + case "web_search_20260209": + if tl.MaxUses == nil || *tl.MaxUses != five { + t.Errorf("%s: MaxUses not preserved, got %v", label, tl.MaxUses) + } + if len(tl.AllowedCallers) != 1 || tl.AllowedCallers[0] != "direct" { + t.Errorf("%s: AllowedCallers not preserved, got %v", label, tl.AllowedCallers) + } + case "web_fetch_20260309": + if tl.MaxContentTokens == nil || *tl.MaxContentTokens != maxContent { + t.Errorf("%s: MaxContentTokens not preserved, got %v", label, tl.MaxContentTokens) + } + if tl.Citations == nil || tl.Citations.Enabled == nil || !*tl.Citations.Enabled { + t.Errorf("%s: Citations not preserved, got %v", label, tl.Citations) + } + if tl.UseCache == nil || !*tl.UseCache { + t.Errorf("%s: UseCache not preserved", label) + } + _ = ptrTrue + case "computer_20251124": + if tl.DisplayWidthPx == nil || *tl.DisplayWidthPx != w { + t.Errorf("%s: DisplayWidthPx not preserved, got %v", label, tl.DisplayWidthPx) + } + if tl.DisplayHeightPx == nil || *tl.DisplayHeightPx != h { + t.Errorf("%s: DisplayHeightPx not preserved, got %v", label, tl.DisplayHeightPx) + } + case "text_editor_20250728": + if tl.MaxCharacters == nil || *tl.MaxCharacters != maxChars { + t.Errorf("%s: MaxCharacters not preserved, got %v", label, tl.MaxCharacters) + } + case "mcp_toolset": + if tl.MCPServerName != "notion" { + t.Errorf("%s: MCPServerName not preserved, got %q", label, tl.MCPServerName) + } + if len(tl.Configs) != 1 { + t.Errorf("%s: Configs not preserved, got %v", label, tl.Configs) + } + } + } + + var tool schemas.ChatTool + if err := sonic.Unmarshal([]byte(tc.raw), &tool); err != nil { + t.Fatalf("unmarshal failed: %v", err) + } + if string(tool.Type) == "" { + t.Errorf("Type should be preserved, got empty") + } + if tool.Name == "" { + t.Errorf("Name should be preserved, got empty") + } + assertVariantFields("first decode", tool) + + // Re-marshal and re-decode — all preserved fields should survive round trip. + out, err := schemas.MarshalSorted(tool) + if err != nil { + t.Fatalf("marshal failed: %v", err) + } + var tool2 schemas.ChatTool + if err := sonic.Unmarshal(out, &tool2); err != nil { + t.Fatalf("second unmarshal failed: %v\njson: %s", err, string(out)) + } + if tool.Name != tool2.Name || tool.Type != tool2.Type { + t.Errorf("round-trip mismatch\n in: %s\n out: %s", tc.raw, string(out)) + } + assertVariantFields("round trip", tool2) + }) + } +} + +// TestToAnthropicChatRequest_ServerTools verifies every ChatTool server-tool +// shape converts correctly through ToAnthropicChatRequest. +func TestToAnthropicChatRequest_ServerTools(t *testing.T) { + mk := func(rawTool string) *schemas.BifrostChatRequest { + var tool schemas.ChatTool + if err := sonic.Unmarshal([]byte(rawTool), &tool); err != nil { + t.Fatalf("test setup: %v", err) + } + return &schemas.BifrostChatRequest{ + Provider: schemas.Anthropic, + Model: "claude-sonnet-4-6", + Input: []schemas.ChatMessage{{Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("hi")}}}, + Params: &schemas.ChatParameters{Tools: []schemas.ChatTool{tool}}, + } + } + + type check struct { + expectName string + expectType AnthropicToolType + expectWebSearch bool + expectWebFetch bool + expectComputer bool + expectTextEditor bool + expectMCPToolset bool + } + + cases := []struct { + name string + raw string + want check + }{ + { + name: "web_search", + raw: `{"type":"web_search_20260209","name":"web_search","max_uses":5}`, + want: check{expectName: "web_search", expectType: "web_search_20260209", expectWebSearch: true}, + }, + { + name: "web_fetch", + raw: `{"type":"web_fetch_20260309","name":"web_fetch","max_uses":3,"use_cache":true}`, + want: check{expectName: "web_fetch", expectType: "web_fetch_20260309", expectWebFetch: true}, + }, + { + name: "computer_20251124", + raw: `{"type":"computer_20251124","name":"computer","display_width_px":1280,"display_height_px":800}`, + want: check{expectName: "computer", expectType: "computer_20251124", expectComputer: true}, + }, + { + name: "text_editor_20250728", + raw: `{"type":"text_editor_20250728","name":"str_replace_based_edit_tool","max_characters":16000}`, + want: check{expectName: "str_replace_based_edit_tool", expectType: "text_editor_20250728", expectTextEditor: true}, + }, + { + name: "bash_20250124", + raw: `{"type":"bash_20250124","name":"bash"}`, + want: check{expectName: "bash", expectType: "bash_20250124"}, + }, + { + name: "mcp_toolset", + raw: `{"type":"mcp_toolset","name":"notion","mcp_server_name":"notion"}`, + want: check{expectMCPToolset: true}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + req := mk(tc.raw) + out, err := ToAnthropicChatRequest(nil, req) + if err != nil { + t.Fatalf("conversion failed: %v", err) + } + if len(out.Tools) != 1 { + t.Fatalf("expected 1 tool, got %d (raw: %s)", len(out.Tools), tc.raw) + } + at := out.Tools[0] + if tc.want.expectMCPToolset { + if at.MCPToolset == nil { + t.Errorf("expected MCPToolset to be set") + } + return + } + if at.Name != tc.want.expectName { + t.Errorf("Name: got %q want %q", at.Name, tc.want.expectName) + } + if at.Type == nil || *at.Type != tc.want.expectType { + t.Errorf("Type: got %v want %q", at.Type, tc.want.expectType) + } + if tc.want.expectWebSearch && at.AnthropicToolWebSearch == nil { + t.Errorf("expected AnthropicToolWebSearch populated") + } + if tc.want.expectWebFetch && at.AnthropicToolWebFetch == nil { + t.Errorf("expected AnthropicToolWebFetch populated") + } + if tc.want.expectComputer && at.AnthropicToolComputerUse == nil { + t.Errorf("expected AnthropicToolComputerUse populated") + } + if tc.want.expectTextEditor && at.AnthropicToolTextEditor == nil { + t.Errorf("expected AnthropicToolTextEditor populated") + } + }) + } +} + +// TestToBifrostResponsesRequest_MCPToolsetPreservesAnthropicFlags verifies +// that when an Anthropic request carries an mcp_toolset tool with the four +// Anthropic-native flags (DeferLoading, AllowedCallers, InputExamples, +// EagerInputStreaming), those flags survive the inbound conversion into the +// neutral ResponsesTool on the mcp_servers merge path. Before the fix, the +// merge path only applied MCP configs (allowlist/cache-control) and dropped +// the flags because convertAnthropicToolToBifrost skips mcp_toolset entries. +func TestToBifrostResponsesRequest_MCPToolsetPreservesAnthropicFlags(t *testing.T) { + toolsetType := "mcp_toolset" + _ = toolsetType // shape documentation only; AnthropicTool.Type is pointer-to-enum and left nil for mcp_toolset + + req := &AnthropicMessageRequest{ + Model: "claude-sonnet-4-6", + Tools: []AnthropicTool{ + { + Name: "notion", + DeferLoading: schemas.Ptr(true), + AllowedCallers: []string{"direct", "agent"}, + EagerInputStreaming: schemas.Ptr(false), + InputExamples: []AnthropicToolInputExample{ + {Input: json.RawMessage(`{"q":"hello"}`), Description: schemas.Ptr("basic")}, + }, + MCPToolset: &AnthropicMCPToolsetTool{ + Type: "mcp_toolset", + MCPServerName: "notion", + DefaultConfig: &AnthropicMCPToolsetConfig{Enabled: schemas.Ptr(true)}, + }, + }, + }, + MCPServers: []AnthropicMCPServerV2{ + {Type: "url", URL: "https://mcp.example.com", Name: "notion"}, + }, + } + + got := req.ToBifrostResponsesRequest(nil) + if got == nil || got.Params == nil { + t.Fatalf("ToBifrostResponsesRequest returned nil params") + } + + // The mcp_toolset tool should have been dropped by convertAnthropicToolToBifrost + // and re-created on the mcp_servers merge path — end result: exactly one tool, + // of type mcp, carrying the Anthropic flags we set. + if len(got.Params.Tools) != 1 { + t.Fatalf("expected 1 mcp tool after merge, got %d", len(got.Params.Tools)) + } + mcp := got.Params.Tools[0] + if mcp.Type != schemas.ResponsesToolTypeMCP { + t.Errorf("expected MCP tool, got type=%q", mcp.Type) + } + if mcp.DeferLoading == nil || !*mcp.DeferLoading { + t.Errorf("DeferLoading dropped on mcp_toolset merge path") + } + if len(mcp.AllowedCallers) != 2 || mcp.AllowedCallers[0] != "direct" { + t.Errorf("AllowedCallers dropped on mcp_toolset merge path, got %v", mcp.AllowedCallers) + } + if len(mcp.InputExamples) != 1 { + t.Errorf("InputExamples dropped on mcp_toolset merge path, got len=%d", len(mcp.InputExamples)) + } + if mcp.EagerInputStreaming == nil || *mcp.EagerInputStreaming { + t.Errorf("EagerInputStreaming dropped on mcp_toolset merge path, got %v", mcp.EagerInputStreaming) + } +} + +// TestToAnthropicChatRequest_ServerTools_ReproUserBug is the exact shape +// from the reported curl — web_search_20260209 with max_uses + allowed_callers. +// Verifies the request reaches ToAnthropicChatRequest output with a populated +// tools array (previously it was silently dropped). +func TestToAnthropicChatRequest_ServerTools_ReproUserBug(t *testing.T) { + raw := []byte(`{ + "model":"claude-sonnet-4-6", + "messages":[{"role":"user","content":"What is the weather in SF?"}], + "tools":[{"name":"web_search","type":"web_search_20260209","max_uses":5,"allowed_callers":["direct"]}] + }`) + // Unmarshal through the neutral schema the way the OpenAI endpoint does. + var inner struct { + Model string `json:"model"` + Messages []json.RawMessage `json:"messages"` + Tools []schemas.ChatTool `json:"tools"` + } + if err := sonic.Unmarshal(raw, &inner); err != nil { + t.Fatalf("outer unmarshal: %v", err) + } + if len(inner.Tools) != 1 { + t.Fatalf("setup: expected 1 tool in raw JSON, got %d", len(inner.Tools)) + } + if inner.Tools[0].Name == "" { + t.Errorf("Name lost at neutral-schema decode (was the bug). Got: %+v", inner.Tools[0]) + } + if inner.Tools[0].MaxUses == nil { + t.Errorf("MaxUses lost at neutral-schema decode (was the bug)") + } + + req := &schemas.BifrostChatRequest{ + Provider: schemas.Anthropic, + Model: inner.Model, + Input: []schemas.ChatMessage{{Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("hi")}}}, + Params: &schemas.ChatParameters{Tools: inner.Tools}, + } + out, err := ToAnthropicChatRequest(nil, req) + if err != nil { + t.Fatalf("conversion failed: %v", err) + } + if len(out.Tools) != 1 { + t.Fatalf("repro bug: expected 1 tool after conversion, got %d (tools array was empty — this was the bug)", len(out.Tools)) + } + if out.Tools[0].Name != "web_search" { + t.Errorf("tool Name: got %q, want %q", out.Tools[0].Name, "web_search") + } + if out.Tools[0].AnthropicToolWebSearch == nil || + out.Tools[0].AnthropicToolWebSearch.MaxUses == nil || + *out.Tools[0].AnthropicToolWebSearch.MaxUses != 5 { + t.Errorf("tool max_uses lost: %+v", out.Tools[0]) + } +} diff --git a/core/providers/anthropic/chat_test.go b/core/providers/anthropic/chat_test.go index 4d0ea9ac45..04df3bd02f 100644 --- a/core/providers/anthropic/chat_test.go +++ b/core/providers/anthropic/chat_test.go @@ -85,7 +85,7 @@ func TestToAnthropicChatRequest_CachingDeterminism(t *testing.T) { Model: "claude-sonnet-4-20250514", Input: []schemas.ChatMessage{{ Role: schemas.ChatMessageRoleUser, - Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("test")}, + Content: &schemas.ChatMessageContent{ContentStr: new("test")}, }}, Params: &schemas.ChatParameters{ Tools: []schemas.ChatTool{{ @@ -511,3 +511,163 @@ func TestToAnthropicChatRequest_NormalFlowUnchanged(t *testing.T) { t.Errorf("block 1: expected text %q, got %v", responseText, blocks[1].Text) } } + +func TestToAnthropicChatRequest_Opus47_StripsTemperatureTopPTopK(t *testing.T) { + temp := 0.7 + topP := 0.9 + + bifrostReq := &schemas.BifrostChatRequest{ + Provider: schemas.Anthropic, + Model: "claude-opus-4-7-20260401", + Input: []schemas.ChatMessage{ + {Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("hi")}}, + }, + Params: &schemas.ChatParameters{ + Temperature: &temp, + TopP: &topP, + ExtraParams: map[string]interface{}{"top_k": 40}, + }, + } + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + result, err := ToAnthropicChatRequest(ctx, bifrostReq) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Temperature != nil { + t.Errorf("expected Temperature to be nil for Opus 4.7, got %v", result.Temperature) + } + if result.TopP != nil { + t.Errorf("expected TopP to be nil for Opus 4.7, got %v", result.TopP) + } + if result.TopK != nil { + t.Errorf("expected TopK to be nil for Opus 4.7, got %v", result.TopK) + } +} + +func TestToAnthropicChatRequest_NonOpus47_PreservesTemperature(t *testing.T) { + temp := 0.7 + + bifrostReq := &schemas.BifrostChatRequest{ + Provider: schemas.Anthropic, + Model: "claude-opus-4-6-20250514", + Input: []schemas.ChatMessage{ + {Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("hi")}}, + }, + Params: &schemas.ChatParameters{ + Temperature: &temp, + }, + } + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + result, err := ToAnthropicChatRequest(ctx, bifrostReq) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Temperature == nil || *result.Temperature != temp { + t.Errorf("expected Temperature %v, got %v", temp, result.Temperature) + } +} + +func TestToAnthropicChatRequest_Opus47_ReasoningMaxTokens_AdaptiveOnly(t *testing.T) { + maxTok := 2048 + + bifrostReq := &schemas.BifrostChatRequest{ + Provider: schemas.Anthropic, + Model: "claude-opus-4-7-20260401", + Input: []schemas.ChatMessage{ + {Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("think")}}, + }, + Params: &schemas.ChatParameters{ + MaxCompletionTokens: schemas.Ptr(8192), + Reasoning: &schemas.ChatReasoning{MaxTokens: &maxTok}, + }, + } + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + result, err := ToAnthropicChatRequest(ctx, bifrostReq) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Thinking == nil { + t.Fatal("expected Thinking to be set") + } + if result.Thinking.Type != "adaptive" { + t.Errorf("expected thinking type 'adaptive' for Opus 4.7, got %q", result.Thinking.Type) + } + if result.Thinking.BudgetTokens != nil { + t.Errorf("expected BudgetTokens to be nil for Opus 4.7, got %v", result.Thinking.BudgetTokens) + } +} + +func TestToAnthropicChatRequest_NonOpus47_ReasoningMaxTokens_EnabledWithBudget(t *testing.T) { + maxTok := 2048 + + bifrostReq := &schemas.BifrostChatRequest{ + Provider: schemas.Anthropic, + Model: "claude-opus-4-6-20250514", + Input: []schemas.ChatMessage{ + {Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("think")}}, + }, + Params: &schemas.ChatParameters{ + MaxCompletionTokens: schemas.Ptr(8192), + Reasoning: &schemas.ChatReasoning{MaxTokens: &maxTok}, + }, + } + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + result, err := ToAnthropicChatRequest(ctx, bifrostReq) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Thinking == nil { + t.Fatal("expected Thinking to be set") + } + if result.Thinking.Type != "enabled" { + t.Errorf("expected thinking type 'enabled' for Opus 4.6, got %q", result.Thinking.Type) + } + if result.Thinking.BudgetTokens == nil || *result.Thinking.BudgetTokens != maxTok { + t.Errorf("expected BudgetTokens %d, got %v", maxTok, result.Thinking.BudgetTokens) + } +} + +func TestToAnthropicChatRequest_Opus47_ReasoningEffort_AdaptiveWithEffort(t *testing.T) { + effort := "high" + + bifrostReq := &schemas.BifrostChatRequest{ + Provider: schemas.Anthropic, + Model: "claude-opus-4-7-20260401", + Input: []schemas.ChatMessage{ + {Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("think")}}, + }, + Params: &schemas.ChatParameters{ + MaxCompletionTokens: schemas.Ptr(8192), + Reasoning: &schemas.ChatReasoning{Effort: &effort}, + }, + } + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + result, err := ToAnthropicChatRequest(ctx, bifrostReq) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result.Thinking == nil { + t.Fatal("expected Thinking to be set") + } + if result.Thinking.Type != "adaptive" { + t.Errorf("expected thinking type 'adaptive' for Opus 4.7 effort-based, got %q", result.Thinking.Type) + } + if result.OutputConfig == nil || result.OutputConfig.Effort == nil { + t.Error("expected OutputConfig.Effort to be set for Opus 4.7 effort-based reasoning") + } +} diff --git a/core/providers/anthropic/responses.go b/core/providers/anthropic/responses.go index 5ec1c17a80..cb3528aaa0 100644 --- a/core/providers/anthropic/responses.go +++ b/core/providers/anthropic/responses.go @@ -77,12 +77,28 @@ var anthropicResponsesStreamStatePool = sync.Pool{ }, } -// webSearchItemIDs tracks item IDs for WebSearch tools to skip their argument deltas -// Maps item_id (string) -> true for WebSearch tools that need delta skipping -var webSearchItemIDs sync.Map +// anthropicToResponsesStreamState holds per-request state for the Bifrost→Anthropic +// stream conversion direction. +type anthropicToResponsesStreamState struct { + // webSearchItemIDs tracks item IDs for WebSearch tools so their argument deltas + // can be skipped and regenerated synthetically (with sanitization) at output_item.done. + webSearchItemIDs map[string]bool +} + +type anthropicToResponsesStreamStateKeyType struct{} -// webFetchItemIDs tracks item IDs for WebFetch tools to skip their argument deltas -var webFetchItemIDs sync.Map +var anthropicToResponsesStreamStateKey = anthropicToResponsesStreamStateKeyType{} + +// getOrCreateAnthropicToResponsesStreamState returns the per-request conversion state, +// creating and storing it in ctx on first access. +func getOrCreateAnthropicToResponsesStreamState(ctx *schemas.BifrostContext) *anthropicToResponsesStreamState { + if v := ctx.Value(anthropicToResponsesStreamStateKey); v != nil { + return v.(*anthropicToResponsesStreamState) + } + state := &anthropicToResponsesStreamState{} + ctx.SetValue(anthropicToResponsesStreamStateKey, state) + return state +} // acquireAnthropicResponsesStreamState gets an Anthropic responses stream state from the pool. func acquireAnthropicResponsesStreamState() *AnthropicResponsesStreamState { @@ -1580,10 +1596,15 @@ func ToAnthropicResponsesStreamResponse(ctx *schemas.BifrostContext, bifrostResp contentBlock.Input = json.RawMessage("{}") // Track WebSearch tools so we can skip their argument deltas + // and regenerate them synthetically (with sanitization) at output_item.done if bifrostResp.Item.ResponsesToolMessage.Name != nil && *bifrostResp.Item.ResponsesToolMessage.Name == "WebSearch" && bifrostResp.Item.ID != nil { - webSearchItemIDs.Store(*bifrostResp.Item.ID, true) + streamState := getOrCreateAnthropicToResponsesStreamState(ctx) + if streamState.webSearchItemIDs == nil { + streamState.webSearchItemIDs = make(map[string]bool) + } + streamState.webSearchItemIDs[*bifrostResp.Item.ID] = true } } } @@ -1691,12 +1712,10 @@ func ToAnthropicResponsesStreamResponse(ctx *schemas.BifrostContext, bifrostResp } case schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta: - // Skip WebSearch/WebFetch tool argument deltas - they will be sent synthetically in output_item.done + // Skip WebSearch tool argument deltas - they will be sent synthetically in output_item.done if bifrostResp.ItemID != nil { - if _, isWebSearch := webSearchItemIDs.Load(*bifrostResp.ItemID); isWebSearch { - return nil - } - if _, isWebFetch := webFetchItemIDs.Load(*bifrostResp.ItemID); isWebFetch { + streamState := getOrCreateAnthropicToResponsesStreamState(ctx) + if streamState.webSearchItemIDs[*bifrostResp.ItemID] { return nil } } @@ -1768,52 +1787,46 @@ func ToAnthropicResponsesStreamResponse(ctx *schemas.BifrostContext, bifrostResp case schemas.ResponsesStreamResponseTypeOutputItemDone: // Handle WebSearch tool completion with sanitization and synthetic delta generation + if bifrostResp.Item != nil && + bifrostResp.Item.Type != nil && + *bifrostResp.Item.Type == schemas.ResponsesMessageTypeFunctionCall && + bifrostResp.Item.ResponsesToolMessage != nil && + bifrostResp.Item.ResponsesToolMessage.Name != nil && + *bifrostResp.Item.ResponsesToolMessage.Name == "WebSearch" && + bifrostResp.Item.ResponsesToolMessage.Arguments != nil { - // check for claude-cli user agent - if ctx != nil { - if IsClaudeCodeRequest(ctx) { - // check for WebSearch tool - if bifrostResp.Item != nil && - bifrostResp.Item.Type != nil && - *bifrostResp.Item.Type == schemas.ResponsesMessageTypeFunctionCall && - bifrostResp.Item.ResponsesToolMessage != nil && - bifrostResp.Item.ResponsesToolMessage.Name != nil && - *bifrostResp.Item.ResponsesToolMessage.Name == "WebSearch" && - bifrostResp.Item.ResponsesToolMessage.Arguments != nil { - - argumentsJSON := sanitizeWebSearchArguments(*bifrostResp.Item.ResponsesToolMessage.Arguments) - bifrostResp.Item.ResponsesToolMessage.Arguments = &argumentsJSON - - // Generate synthetic input_json_delta events for the sanitized WebSearch arguments - // This replaces the delta events that were skipped earlier - var events []*AnthropicStreamEvent - - // Use OutputIndex for proper Anthropic indexing, fallback to ContentIndex - var indexToUse *int - if bifrostResp.OutputIndex != nil { - indexToUse = bifrostResp.OutputIndex - } else if bifrostResp.ContentIndex != nil { - indexToUse = bifrostResp.ContentIndex - } + argumentsJSON := sanitizeWebSearchArguments(*bifrostResp.Item.ResponsesToolMessage.Arguments) + bifrostResp.Item.ResponsesToolMessage.Arguments = &argumentsJSON - deltaEvents := generateSyntheticInputJSONDeltas(argumentsJSON, indexToUse) - events = append(events, deltaEvents...) + // Generate synthetic input_json_delta events for the sanitized WebSearch arguments + // This replaces the delta events that were skipped earlier + var events []*AnthropicStreamEvent - // Add the content_block_stop event at the end - stopEvent := &AnthropicStreamEvent{ - Type: AnthropicStreamEventTypeContentBlockStop, - Index: indexToUse, - } - events = append(events, stopEvent) + // Use OutputIndex for proper Anthropic indexing, fallback to ContentIndex + var indexToUse *int + if bifrostResp.OutputIndex != nil { + indexToUse = bifrostResp.OutputIndex + } else if bifrostResp.ContentIndex != nil { + indexToUse = bifrostResp.ContentIndex + } - // Clean up the tracking for this WebSearch item - if bifrostResp.Item.ID != nil { - webSearchItemIDs.Delete(*bifrostResp.Item.ID) - } + deltaEvents := generateSyntheticInputJSONDeltas(argumentsJSON, indexToUse) + events = append(events, deltaEvents...) - return events - } + // Add the content_block_stop event at the end + stopEvent := &AnthropicStreamEvent{ + Type: AnthropicStreamEventTypeContentBlockStop, + Index: indexToUse, + } + events = append(events, stopEvent) + + // Clean up the tracking for this WebSearch item + if bifrostResp.Item.ID != nil { + streamState := getOrCreateAnthropicToResponsesStreamState(ctx) + delete(streamState.webSearchItemIDs, *bifrostResp.Item.ID) } + + return events } if bifrostResp.Item != nil && @@ -2145,6 +2158,9 @@ func (req *AnthropicMessageRequest) ToBifrostResponsesRequest(ctx *schemas.Bifro // GA structured outputs - OutputConfig.Format has same structure as OutputFormat params.Text = convertAnthropicOutputFormatToResponsesTextConfig(req.OutputConfig.Format) } + if req.OutputConfig != nil && req.OutputConfig.TaskBudget != nil { + params.ExtraParams["task_budget"] = req.OutputConfig.TaskBudget + } if req.Thinking != nil { if req.Thinking.Type == "enabled" || req.Thinking.Type == "adaptive" { var summary *string @@ -2157,10 +2173,14 @@ func (req *AnthropicMessageRequest) ToBifrostResponsesRequest(ctx *schemas.Bifro summary = schemas.Ptr("detailed") } } + // If the request was sent with display:"omitted" + if req.Thinking.Display != nil && *req.Thinking.Display == "omitted" { + summary = schemas.Ptr("none") + } if req.OutputConfig != nil && req.OutputConfig.Effort != nil { // Native effort present — map to Bifrost enum (e.g., "max" → "high") params.Reasoning = &schemas.ResponsesParametersReasoning{ - Effort: schemas.Ptr(MapAnthropicEffortToBifrost(*req.OutputConfig.Effort)), + Effort: schemas.Ptr(*req.OutputConfig.Effort), MaxTokens: req.Thinking.BudgetTokens, Summary: summary, } @@ -2219,6 +2239,7 @@ func (req *AnthropicMessageRequest) ToBifrostResponsesRequest(ctx *schemas.Bifro for _, tool := range req.Tools { bifrostTool := convertAnthropicToolToBifrost(&tool) if bifrostTool != nil { + applyAnthropicToolFlagsToResponsesTool(&tool, bifrostTool) bifrostTools = append(bifrostTools, *bifrostTool) } } @@ -2228,12 +2249,17 @@ func (req *AnthropicMessageRequest) ToBifrostResponsesRequest(ctx *schemas.Bifro } if req.MCPServers != nil { - // Build a map of mcp_toolset configs from tools[] keyed by mcp_server_name - toolsetByServer := make(map[string]*AnthropicMCPToolsetTool) + // Build a map of mcp_toolset entries from tools[] keyed by mcp_server_name. + // Stores the full *AnthropicTool (not just *AnthropicMCPToolsetTool) so + // top-level Anthropic tool flags (DeferLoading, AllowedCallers, + // InputExamples, EagerInputStreaming) survive the mcp_servers merge path — + // without this, mcp_toolset tools bypass applyAnthropicToolFlagsToResponsesTool + // because convertAnthropicToolToBifrost skips them. + toolsetByServer := make(map[string]*AnthropicTool) if req.Tools != nil { for i := range req.Tools { if req.Tools[i].MCPToolset != nil { - toolsetByServer[req.Tools[i].MCPToolset.MCPServerName] = req.Tools[i].MCPToolset + toolsetByServer[req.Tools[i].MCPToolset.MCPServerName] = &req.Tools[i] } } } @@ -2242,9 +2268,10 @@ func (req *AnthropicMessageRequest) ToBifrostResponsesRequest(ctx *schemas.Bifro for _, mcpServer := range req.MCPServers { bifrostMCPTool := convertAnthropicMCPServerV2ToBifrostTool(&mcpServer) if bifrostMCPTool != nil { - // Merge mcp_toolset configs (allowed tools) if present - if toolset, ok := toolsetByServer[mcpServer.Name]; ok { - applyMCPToolsetConfigToBifrostTool(bifrostMCPTool, toolset) + // Merge mcp_toolset configs (allowed tools) + Anthropic tool flags if present + if toolWithFlags, ok := toolsetByServer[mcpServer.Name]; ok { + applyMCPToolsetConfigToBifrostTool(bifrostMCPTool, toolWithFlags.MCPToolset) + applyAnthropicToolFlagsToResponsesTool(toolWithFlags, bifrostMCPTool) } bifrostMCPTools = append(bifrostMCPTools, *bifrostMCPTool) } @@ -2286,12 +2313,15 @@ func ToAnthropicResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schema if bifrostReq.Params.MaxOutputTokens != nil { anthropicReq.MaxTokens = *bifrostReq.Params.MaxOutputTokens } - // Anthropic doesn't allow both temperature and top_p to be specified - // If both are present, prefer temperature (more commonly used) - if bifrostReq.Params.Temperature != nil { - anthropicReq.Temperature = bifrostReq.Params.Temperature - } else if bifrostReq.Params.TopP != nil { - anthropicReq.TopP = bifrostReq.Params.TopP + // Opus 4.7+ rejects temperature, top_p, and top_k with a 400 error. + if !IsOpus47(bifrostReq.Model) { + // Anthropic doesn't allow both temperature and top_p to be specified. + // If both are present, prefer temperature (more commonly used). + if bifrostReq.Params.Temperature != nil { + anthropicReq.Temperature = bifrostReq.Params.Temperature + } else if bifrostReq.Params.TopP != nil { + anthropicReq.TopP = bifrostReq.Params.TopP + } } if bifrostReq.Params.User != nil { anthropicReq.Metadata = &AnthropicMetaData{ @@ -2351,26 +2381,31 @@ func ToAnthropicResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schema } if bifrostReq.Params.Reasoning != nil { if bifrostReq.Params.Reasoning.MaxTokens != nil { - budgetTokens := *bifrostReq.Params.Reasoning.MaxTokens - if *bifrostReq.Params.Reasoning.MaxTokens == -1 { - // anthropic does not support dynamic reasoning budget like gemini - // setting it to default max tokens - budgetTokens = MinimumReasoningMaxTokens - } - if budgetTokens < MinimumReasoningMaxTokens { - return nil, fmt.Errorf("reasoning.max_tokens must be >= %d for anthropic", MinimumReasoningMaxTokens) - } - anthropicReq.Thinking = &AnthropicThinking{ - Type: "enabled", - BudgetTokens: schemas.Ptr(budgetTokens), + if IsOpus47(bifrostReq.Model) { + // Opus 4.7+: budget_tokens removed; adaptive thinking is the only thinking-on mode. + anthropicReq.Thinking = &AnthropicThinking{Type: "adaptive"} + } else { + budgetTokens := *bifrostReq.Params.Reasoning.MaxTokens + if *bifrostReq.Params.Reasoning.MaxTokens == -1 { + // anthropic does not support dynamic reasoning budget like gemini + // setting it to default max tokens + budgetTokens = MinimumReasoningMaxTokens + } + if budgetTokens < MinimumReasoningMaxTokens { + return nil, fmt.Errorf("reasoning.max_tokens must be >= %d for anthropic", MinimumReasoningMaxTokens) + } + anthropicReq.Thinking = &AnthropicThinking{ + Type: "enabled", + BudgetTokens: schemas.Ptr(budgetTokens), + } } } else { if bifrostReq.Params.Reasoning.Effort != nil { if *bifrostReq.Params.Reasoning.Effort != "none" { effort := MapBifrostEffortToAnthropic(*bifrostReq.Params.Reasoning.Effort) - if SupportsAdaptiveThinking(bifrostReq.Model) { - // Opus 4.6+: adaptive thinking + native effort + if SupportsAdaptiveThinking(bifrostReq.Model) || IsOpus47(bifrostReq.Model) { + // Opus 4.6+ and Opus 4.7+: adaptive thinking + native effort anthropicReq.Thinking = &AnthropicThinking{Type: "adaptive"} setEffortOnOutputConfig(anthropicReq, effort) } else if SupportsNativeEffort(bifrostReq.Model) { @@ -2402,6 +2437,15 @@ func ToAnthropicResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schema } } } + if anthropicReq.Thinking != nil && anthropicReq.Thinking.Type != "disabled" { + if bifrostReq.Params.Reasoning != nil && + bifrostReq.Params.Reasoning.Summary != nil && *bifrostReq.Params.Reasoning.Summary == "none" { + anthropicReq.Thinking.Display = schemas.Ptr("omitted") + } else { + // Default to "summarized" to preserve visible thinking output + anthropicReq.Thinking.Display = schemas.Ptr("summarized") + } + } } // Convert service tier anthropicReq.ServiceTier = bifrostReq.Params.ServiceTier @@ -2436,7 +2480,9 @@ func ToAnthropicResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schema topK, ok := schemas.SafeExtractIntPointer(bifrostReq.Params.ExtraParams["top_k"]) if ok { delete(anthropicReq.ExtraParams, "top_k") - anthropicReq.TopK = topK + if !IsOpus47(bifrostReq.Model) { + anthropicReq.TopK = topK + } } if speed, ok := schemas.SafeExtractStringPointer(bifrostReq.Params.ExtraParams["speed"]); ok { delete(anthropicReq.ExtraParams, "speed") @@ -2462,6 +2508,31 @@ func ToAnthropicResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schema } } } + if tbVal, exists := bifrostReq.Params.ExtraParams["task_budget"]; exists { + // Always consume provider-specific key from passthrough extras. + delete(anthropicReq.ExtraParams, "task_budget") + var taskBudget *AnthropicTaskBudget + switch v := tbVal.(type) { + case *AnthropicTaskBudget: + taskBudget = v + case AnthropicTaskBudget: + taskBudget = &v + default: + if data, err := providerUtils.MarshalSorted(v); err == nil { + var tb AnthropicTaskBudget + if sonic.Unmarshal(data, &tb) == nil { + taskBudget = &tb + } + } + } + if taskBudget == nil { + return nil, fmt.Errorf("invalid task_budget format for anthropic") + } + if anthropicReq.OutputConfig == nil { + anthropicReq.OutputConfig = &AnthropicOutputConfig{} + } + anthropicReq.OutputConfig.TaskBudget = taskBudget + } } // Convert tools @@ -3373,7 +3444,7 @@ func convertAnthropicContentBlocksToResponsesMessagesGrouped(contentBlocks []Ant case AnthropicContentBlockTypeImage: // Don't emit accumulated text or tool_use blocks for images - if block.Source != nil { + if block.Source != nil && block.Source.SourceObj != nil { bifrostMsg := schemas.ResponsesMessage{ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), Role: role, @@ -3389,7 +3460,7 @@ func convertAnthropicContentBlocksToResponsesMessagesGrouped(contentBlocks []Ant case AnthropicContentBlockTypeDocument: // Handle document blocks similar to images - if block.Source != nil { + if block.Source != nil && block.Source.SourceObj != nil { bifrostMsg := schemas.ResponsesMessage{ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), Role: role, @@ -3479,7 +3550,7 @@ func convertAnthropicContentBlocksToResponsesMessagesGrouped(contentBlocks []Ant }) } case AnthropicContentBlockTypeImage: - if contentBlock.Source != nil { + if contentBlock.Source != nil && contentBlock.Source.SourceObj != nil { toolMsgContentBlocks = append(toolMsgContentBlocks, contentBlock.toBifrostResponsesImageBlock()) } } @@ -3692,7 +3763,7 @@ func convertAnthropicContentBlocksToResponsesMessages(ctx *schemas.BifrostContex bifrostMessages = append(bifrostMessages, bifrostMsg) } case AnthropicContentBlockTypeImage: - if block.Source != nil { + if block.Source != nil && block.Source.SourceObj != nil { bifrostMsg := schemas.ResponsesMessage{ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), Role: role, @@ -3706,7 +3777,7 @@ func convertAnthropicContentBlocksToResponsesMessages(ctx *schemas.BifrostContex bifrostMessages = append(bifrostMessages, bifrostMsg) } case AnthropicContentBlockTypeDocument: - if block.Source != nil { + if block.Source != nil && block.Source.SourceObj != nil { bifrostMsg := schemas.ResponsesMessage{ Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), Role: role, @@ -3840,7 +3911,7 @@ func convertAnthropicContentBlocksToResponsesMessages(ctx *schemas.BifrostContex }) } case AnthropicContentBlockTypeImage: - if contentBlock.Source != nil { + if contentBlock.Source != nil && contentBlock.Source.SourceObj != nil { toolMsgContentBlocks = append(toolMsgContentBlocks, contentBlock.toBifrostResponsesImageBlock()) } } @@ -4787,18 +4858,80 @@ func convertBifrostToolsToAnthropic(model string, tools []schemas.ResponsesTool, mcpServers = append(mcpServers, *server) } if toolset != nil { - anthropicTools = append(anthropicTools, AnthropicTool{MCPToolset: toolset}) + mcpTool := AnthropicTool{MCPToolset: toolset} + applyResponsesToolAnthropicFlags(&mcpTool, &tool) + anthropicTools = append(anthropicTools, mcpTool) } continue } anthropicTool := convertBifrostToolToAnthropic(model, &tool, provider, hasWebSearchOrFetch) if anthropicTool != nil { + applyResponsesToolAnthropicFlags(anthropicTool, &tool) anthropicTools = append(anthropicTools, *anthropicTool) } } return anthropicTools, mcpServers } +// applyAnthropicToolFlagsToResponsesTool propagates the Anthropic-native tool +// flags (DeferLoading, AllowedCallers, InputExamples, EagerInputStreaming) in +// the inbound direction: from the incoming AnthropicTool onto the neutral +// ResponsesTool when the native Anthropic /v1/messages endpoint is the entry +// point. Called once per converted tool so every return path inside +// convertAnthropicToolToBifrost benefits. +func applyAnthropicToolFlagsToResponsesTool(at *AnthropicTool, rt *schemas.ResponsesTool) { + if at == nil || rt == nil { + return + } + if at.DeferLoading != nil { + rt.DeferLoading = at.DeferLoading + } + if len(at.AllowedCallers) > 0 { + rt.AllowedCallers = at.AllowedCallers + } + if len(at.InputExamples) > 0 { + rt.InputExamples = make([]schemas.ChatToolInputExample, len(at.InputExamples)) + for i, ex := range at.InputExamples { + rt.InputExamples[i] = schemas.ChatToolInputExample{ + Input: ex.Input, + Description: ex.Description, + } + } + } + if at.EagerInputStreaming != nil { + rt.EagerInputStreaming = at.EagerInputStreaming + } +} + +// applyResponsesToolAnthropicFlags propagates the Anthropic-native tool flags +// (DeferLoading, AllowedCallers, InputExamples, EagerInputStreaming) from the +// neutral ResponsesTool onto the provider-native AnthropicTool. Called once +// per converted tool so every branch in convertBifrostToolToAnthropic +// benefits without duplicating the logic on each return path. +func applyResponsesToolAnthropicFlags(at *AnthropicTool, rt *schemas.ResponsesTool) { + if at == nil || rt == nil { + return + } + if rt.DeferLoading != nil { + at.DeferLoading = rt.DeferLoading + } + if len(rt.AllowedCallers) > 0 { + at.AllowedCallers = rt.AllowedCallers + } + if len(rt.InputExamples) > 0 { + at.InputExamples = make([]AnthropicToolInputExample, len(rt.InputExamples)) + for i, ex := range rt.InputExamples { + at.InputExamples[i] = AnthropicToolInputExample{ + Input: ex.Input, + Description: ex.Description, + } + } + } + if rt.EagerInputStreaming != nil { + at.EagerInputStreaming = rt.EagerInputStreaming + } +} + // Helper function to convert Tool back to AnthropicTool func convertBifrostToolToAnthropic(model string, tool *schemas.ResponsesTool, provider schemas.ModelProvider, hasWebSearchOrFetch bool) *AnthropicTool { if tool == nil { @@ -5136,36 +5269,40 @@ func (block AnthropicContentBlock) toBifrostResponsesDocumentBlock() schemas.Res resultBlock.ResponsesInputMessageContentBlockFile.Filename = block.Title } - if block.Source == nil { + if block.Source == nil || block.Source.SourceObj == nil { + // File-block rendering only applies to object-form sources + // (image / document). String-form sources (search_result) are + // handled elsewhere. return resultBlock } + src := block.Source.SourceObj // Handle different source types - switch block.Source.Type { + switch src.Type { case "url": // URL source - if block.Source.URL != nil { - resultBlock.ResponsesInputMessageContentBlockFile.FileURL = block.Source.URL + if src.URL != nil { + resultBlock.ResponsesInputMessageContentBlockFile.FileURL = src.URL } case "base64": // Base64 encoded data - if block.Source.Data != nil { + if src.Data != nil { // Construct data URL with media type mediaType := "application/pdf" - if block.Source.MediaType != nil { - mediaType = *block.Source.MediaType + if src.MediaType != nil { + mediaType = *src.MediaType } - dataURL := *block.Source.Data + dataURL := *src.Data if !strings.HasPrefix(dataURL, "data:") { - dataURL = "data:" + mediaType + ";base64," + *block.Source.Data + dataURL = "data:" + mediaType + ";base64," + *src.Data } resultBlock.ResponsesInputMessageContentBlockFile.FileData = &dataURL } case "text": // Plain text source - if block.Source.Data != nil { + if src.Data != nil { resultBlock.ResponsesInputMessageContentBlockFile.FileType = schemas.Ptr("text/plain") - resultBlock.ResponsesInputMessageContentBlockFile.FileData = block.Source.Data + resultBlock.ResponsesInputMessageContentBlockFile.FileData = src.Data } } diff --git a/core/providers/anthropic/types.go b/core/providers/anthropic/types.go index f803c337e5..3c11d076d9 100644 --- a/core/providers/anthropic/types.go +++ b/core/providers/anthropic/types.go @@ -26,6 +26,12 @@ const ( AnthropicStructuredOutputsBetaHeader = "structured-outputs-2025-11-13" // AnthropicAdvancedToolUseBetaHeader is required for defer_loading, input_examples, and allowed_callers. AnthropicAdvancedToolUseBetaHeader = "advanced-tool-use-2025-11-20" + // AnthropicToolExamplesBetaHeader is required for tool.input_examples as a + // standalone feature (Bedrock supports this narrow header without the full + // advanced-tool-use-2025-11-20 bundle). + // Source: AWS Bedrock user guide beta-header list: + // https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html + AnthropicToolExamplesBetaHeader = "tool-examples-2025-10-29" // AnthropicMCPClientBetaHeader is required for MCP servers (current version). AnthropicMCPClientBetaHeader = "mcp-client-2025-11-20" // AnthropicMCPClientBetaHeaderDeprecated is the previous MCP beta header (kept for fallback). @@ -48,6 +54,12 @@ const ( AnthropicFastModeBetaHeader = "fast-mode-2026-02-01" // AnthropicRedactThinkingBetaHeader is required for redacting thinking blocks in responses. AnthropicRedactThinkingBetaHeader = "redact-thinking-2026-02-12" + // AnthropicTaskBudgetsBetaHeader is required for output_config.task_budget (Opus 4.7+). + AnthropicTaskBudgetsBetaHeader = "task-budgets-2026-03-13" + // AnthropicEagerInputStreamingBetaHeader is required for eager_input_streaming + // on custom tools (streams input_json_delta before full args are determined). + // Per Table 20: GA on Anthropic/Bedrock/Vertex, Beta on Azure. + AnthropicEagerInputStreamingBetaHeader = "fine-grained-tool-streaming-2025-05-14" // AnthropicComputerUseBetaHeader is required for computer use (version-specific). // computer_20251124 (Opus 4.6, Sonnet 4.6, Opus 4.5) uses the newer beta header. @@ -59,6 +71,7 @@ const ( // Use these with strings.HasPrefix when filtering headers per provider, // so that future date bumps (e.g. structured-outputs-2025-12-15) are still matched. AnthropicAdvancedToolUseBetaHeaderPrefix = "advanced-tool-use-" + AnthropicToolExamplesBetaHeaderPrefix = "tool-examples-" AnthropicStructuredOutputsBetaHeaderPrefix = "structured-outputs-" AnthropicPromptCachingScopeBetaHeaderPrefix = "prompt-caching-scope-" AnthropicMCPClientBetaHeaderPrefix = "mcp-client-" @@ -67,64 +80,123 @@ const ( AnthropicContext1MBetaHeaderPrefix = "context-1m-" AnthropicFastModeBetaHeaderPrefix = "fast-mode-" AnthropicRedactThinkingBetaHeaderPrefix = "redact-thinking-" + AnthropicTaskBudgetsBetaHeaderPrefix = "task-budgets-" + AnthropicEagerInputStreamingBetaHeaderPrefix = "fine-grained-tool-streaming-" ) // ProviderFeatureSupport defines which Anthropic features a given provider supports. -// Source: https://docs.anthropic.com/en/build-with-claude/overview (March 2026) +// +// Authoritative sources (verified 2026-04-17): +// A = Anthropic feature-availability table: +// https://platform.claude.com/docs/en/build-with-claude/overview +// B-header = AWS Bedrock user guide beta-header list: +// https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html +// B-platform = https://platform.claude.com/docs/en/build-with-claude/claude-on-amazon-bedrock +// V-platform = https://platform.claude.com/docs/en/build-with-claude/claude-on-vertex-ai +// Az-platform = https://platform.claude.com/docs/en/build-with-claude/claude-in-microsoft-foundry +// MCP-excl = MCP connector explicit Bedrock/Vertex exclusion: +// https://platform.claude.com/docs/en/agents-and-tools/mcp-connector +// Advisor-excl = Advisor tool Claude-API-only: +// https://platform.claude.com/docs/en/agents-and-tools/tool-use/advisor-tool type ProviderFeatureSupport struct { - WebSearch bool // web_search server tool - WebSearchDynamic bool // web_search_20260209 (dynamic filtering, requires code_execution) - WebFetch bool // web_fetch server tool - CodeExecution bool // code_execution server tool - ComputerUse bool // computer_use client tool - Bash bool // bash client tool - Memory bool // memory client tool - TextEditor bool // text_editor client tool - ToolSearch bool // tool_search server tool - MCP bool // MCP connector - AdvancedToolUse bool // advanced-tool-use (defer_loading, input_examples, allowed_callers) - StructuredOutputs bool // strict tool validation and output_format - PromptCachingScope bool // prompt caching scope - Compaction bool // server-side context compaction - ContextEditing bool // context editing (clear_tool_uses, clear_thinking) - FilesAPI bool // Files API - InterleavedThinking bool // interleaved thinking between tool calls - Skills bool // Agent Skills - Context1M bool // 1M context window beta (for Sonnet 4.5/4 only) - FastMode bool // fast mode (Opus 4.6 only, research preview) - RedactThinking bool // redact thinking blocks in responses + WebSearch bool // web_search server tool (cite: A) + WebSearchDynamic bool // web_search_20260209 dynamic filtering (cite: A) + WebFetch bool // web_fetch server tool (cite: A) + CodeExecution bool // code_execution server tool (cite: A) + ComputerUse bool // computer_use client tool (cite: A, B-header) + Bash bool // bash client tool (cite: A, B-header) + Memory bool // memory client tool — on Bedrock bundled under context-management-2025-06-27 (cite: A, B-header) + TextEditor bool // text_editor client tool (cite: A) + ToolSearch bool // tool_search server tool — tool-search-tool-2025-10-19 (cite: A, B-header) + MCP bool // MCP connector — explicit "not supported on Bedrock/Vertex" (cite: MCP-excl) + AdvancedToolUse bool // advanced-tool-use-2025-11-20 bundle: defer_loading + input_examples + allowed_callers (cite: A) + InputExamples bool // tool.input_examples standalone — tool-examples-2025-10-29. Bedrock supports this independently of the AdvancedToolUse bundle (cite: B-header). On Anthropic / Azure the bundle implicitly covers it. + StructuredOutputs bool // strict tool validation / output_format (cite: A) + PromptCachingScope bool // cache_control.scope — prompt-caching-scope-2026-01-05 (cite: A) + Compaction bool // compact_20260112 (cite: A, B-header) + ContextEditing bool // clear_tool_uses / clear_thinking (cite: A, B-header) + FilesAPI bool // files-api-2025-04-14, file_id source (cite: A) + InterleavedThinking bool // interleaved thinking between tool calls (cite: A, B-header; fails on non-allowlisted models on Bedrock/Vertex) + Skills bool // Agent Skills — container.skills object (cite: A) + ContainerBasic bool // Bare string-form container id — universally supported (cite: A) + Context1M bool // 1M context window — context-1m-2025-08-07 (cite: A) + FastMode bool // Opus 4.6 research preview — fast-mode-2026-02-01 (cite: A) + RedactThinking bool // redact-thinking-2026-02-12 (cite: A) — note Bedrock has its own "thinking encryption" (different mechanism) + TaskBudgets bool // output_config.task_budget — task-budgets-2026-03-13 (cite: A) + InferenceGeo bool // inference_geo field — Claude API only; Bedrock/Vertex/Azure use their own region-routing mechanisms (cite: A) + EagerInputStreaming bool // fine-grained-tool-streaming-2025-05-14 (cite: A, B-header) + AdvisorTool bool // advisor_tool_result block — Anthropic only (cite: Advisor-excl) FileSearch bool // file_search server tool (OpenAI-only) ImageGeneration bool // image_generation server tool (OpenAI-only) } // ProviderFeatures maps each provider to its supported Anthropic features. +// +// Every cell below is sourced from the docs named in ProviderFeatureSupport. +// "Not documented" in upstream docs is treated as unsupported here; if a user +// needs a pass-through, ExtraParams still works. var ProviderFeatures = map[schemas.ModelProvider]ProviderFeatureSupport{ + // Anthropic Claude API direct (cite: A across the board). schemas.Anthropic: { WebSearch: true, WebSearchDynamic: true, WebFetch: true, CodeExecution: true, ComputerUse: true, Bash: true, Memory: true, TextEditor: true, ToolSearch: true, - MCP: true, AdvancedToolUse: true, StructuredOutputs: true, PromptCachingScope: true, + MCP: true, AdvancedToolUse: true, InputExamples: true, StructuredOutputs: true, PromptCachingScope: true, Compaction: true, ContextEditing: true, FilesAPI: true, - InterleavedThinking: true, Skills: true, Context1M: true, FastMode: true, - RedactThinking: true, + InterleavedThinking: true, Skills: true, ContainerBasic: true, Context1M: true, + FastMode: true, RedactThinking: true, TaskBudgets: true, + InferenceGeo: true, EagerInputStreaming: true, AdvisorTool: true, }, + // Google Vertex AI — cite: A (overview table) and V-platform. + // Notably NOT supported: MCP (MCP-excl), Skills/container.skills, + // InferenceGeo, FastMode, TaskBudgets, AdvisorTool, StructuredOutputs, + // PromptCachingScope (400 "unexpected beta header" per LiteLLM #19984), + // FilesAPI, WebFetch, CodeExecution, AdvancedToolUse, RedactThinking. schemas.Vertex: { - WebSearch: true, // only web_search_20250305 (basic), NOT dynamic filtering + WebSearch: true, // web search GA on Vertex per A; earlier code restricted to web_search_20250305 — A doesn't qualify ComputerUse: true, Bash: true, Memory: true, TextEditor: true, ToolSearch: true, - Compaction: true, ContextEditing: true, - InterleavedThinking: true, Context1M: true, + ContainerBasic: true, + Compaction: true, + ContextEditing: true, + InterleavedThinking: true, // V-platform confirms; fails on non-allowlisted 4-series + Context1M: true, + EagerInputStreaming: true, // fine-grained-tool-streaming GA per A }, + // AWS Bedrock — cite: A + B-header (definitive beta-header list). + // Notably NOT supported per docs: MCP, Skills, FilesAPI, WebFetch, + // WebSearch, CodeExecution, FastMode, TaskBudgets, AdvisorTool, + // InferenceGeo, RedactThinking, AdvancedToolUse (full), PromptCachingScope. schemas.Bedrock: { ComputerUse: true, Bash: true, Memory: true, TextEditor: true, ToolSearch: true, - StructuredOutputs: true, Compaction: true, ContextEditing: true, - InterleavedThinking: true, Context1M: true, + ContainerBasic: true, + // StructuredOutputs: kept true to match pre-existing behavior and the + // provider_feature_support_test.go assertion, but NEITHER B-header + // NOR B-platform upstream docs document strict tool validation / + // output_format on Bedrock. Needs live verification. If Bedrock's + // Converse API actually rejects `strict: true`, flip this to false + // and update the corresponding test assertion. + StructuredOutputs: true, + Compaction: true, // compact-2026-01-12 per B-header + ContextEditing: true, // context-management-2025-06-27 per B-header (bundles memory) + InterleavedThinking: true, // per B-header; model-allowlisted + Context1M: true, // Opus 4.6 / Sonnet 4.6 per A + EagerInputStreaming: true, // fine-grained-tool-streaming-2025-05-14 per B-header + InputExamples: true, // tool-examples-2025-10-29 per B-header (standalone; Bedrock doesn't accept the full advanced-tool-use-2025-11-20 bundle — see TestFilterBetaHeadersForProvider) + // AdvancedToolUse intentionally OFF on Bedrock. The bundle header + // (advanced-tool-use-2025-11-20) is not listed in B-header; only the + // narrow tool-examples-2025-10-29 header is, gated via InputExamples above. }, + // Microsoft Azure AI Foundry — cite: A (most features azureAiBeta) + + // Az-platform ("supports most of Claude's features"). Excluded per + // Az-platform: Admin API, Models API, Message Batch API (not in scope). schemas.Azure: { WebSearch: true, WebSearchDynamic: true, WebFetch: true, CodeExecution: true, ComputerUse: true, Bash: true, Memory: true, TextEditor: true, ToolSearch: true, - MCP: true, AdvancedToolUse: true, StructuredOutputs: true, PromptCachingScope: true, + MCP: true, AdvancedToolUse: true, InputExamples: true, StructuredOutputs: true, PromptCachingScope: true, Compaction: true, ContextEditing: true, FilesAPI: true, - InterleavedThinking: true, Skills: true, Context1M: true, - RedactThinking: true, + InterleavedThinking: true, Skills: true, ContainerBasic: true, Context1M: true, + RedactThinking: true, TaskBudgets: true, + EagerInputStreaming: true, + // FastMode, InferenceGeo, AdvisorTool — not in Az-platform; leave off. }, } @@ -156,11 +228,88 @@ func (req *AnthropicTextRequest) IsStreamingRequested() bool { return req.Stream != nil && *req.Stream } -// AnthropicOutputConfig represents the GA structured outputs config (output_config.format) -// and the effort parameter (output_config.effort) for controlling token spending. +// AnthropicTaskBudget represents an advisory token budget for a full agentic loop (output_config.task_budget). +// The model sees a running countdown and uses it to prioritize work and finish gracefully. +// Requires beta header "task-budgets-2026-03-13". Minimum total: 20 000 tokens. +// This is advisory, not a hard cap — use max_tokens as the per-request hard ceiling. +type AnthropicTaskBudget struct { + Type string `json:"type"` // always "tokens" + Total int `json:"total"` // total advisory token budget across the agentic loop + Remaining *int `json:"remaining,omitempty"` // optional; tracks remaining tokens for client-side compaction +} + +// AnthropicOutputConfig represents the GA structured outputs config (output_config.format), +// the effort parameter (output_config.effort), and the task budget (output_config.task_budget). type AnthropicOutputConfig struct { - Format json.RawMessage `json:"format,omitempty"` - Effort *string `json:"effort,omitempty"` // "low", "medium", "high", "max" (Opus 4.5+) + Format json.RawMessage `json:"format,omitempty"` // JSON schema for structured outputs + Effort *string `json:"effort,omitempty"` // "low" | "medium" | "high" | "xhigh" | "max" + TaskBudget *AnthropicTaskBudget `json:"task_budget,omitempty"` // advisory token budget; requires task-budgets-2026-03-13 beta header +} + +// AnthropicContainerSkill represents a single skill attached to a container. +// Requires beta header "skills-2025-10-02". +type AnthropicContainerSkill struct { + SkillID string `json:"skill_id"` // Unique identifier for the skill + Type string `json:"type"` // "anthropic" (built-in) | "custom" (user-defined) + Version *string `json:"version,omitempty"` // Optional version pin +} + +// AnthropicContainerObject represents the object form of the container field: +// { id?: string, skills?: [...] }. The skills[] array is gated by the +// skills-2025-10-02 beta header; a bare id-only container is GA. +type AnthropicContainerObject struct { + ID *string `json:"id,omitempty"` + Skills []AnthropicContainerSkill `json:"skills,omitempty"` +} + +// AnthropicContainer is the "container" field on AnthropicMessageRequest. +// Per Anthropic docs it can be either a bare string (container id) or an +// object with id+skills[]. The object-with-skills form requires beta header +// "skills-2025-10-02"; the string form is GA. +// Source: https://platform.claude.com/docs/en/api/messages/create +type AnthropicContainer struct { + ContainerStr *string + ContainerObject *AnthropicContainerObject +} + +// MarshalJSON encodes the union as either a raw string or the object form. +func (c AnthropicContainer) MarshalJSON() ([]byte, error) { + if c.ContainerStr != nil && c.ContainerObject != nil { + return nil, fmt.Errorf("both ContainerStr and ContainerObject are set; only one should be non-nil") + } + if c.ContainerStr != nil { + return providerUtils.MarshalSorted(*c.ContainerStr) + } + if c.ContainerObject != nil { + return providerUtils.MarshalSorted(c.ContainerObject) + } + return providerUtils.MarshalSorted(nil) +} + +// UnmarshalJSON decodes either a string or the object form into the union. +// Clears the inactive arm on each success so a reused struct never ends up +// with both fields populated (which MarshalJSON rejects). Explicitly handles +// JSON null. Matches the ChatContainer / ChatToolChoice union patterns. +func (c *AnthropicContainer) UnmarshalJSON(data []byte) error { + trimmed := bytes.TrimSpace(data) + if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) { + c.ContainerStr = nil + c.ContainerObject = nil + return nil + } + var s string + if err := sonic.Unmarshal(data, &s); err == nil { + c.ContainerStr = &s + c.ContainerObject = nil + return nil + } + var obj AnthropicContainerObject + if err := sonic.Unmarshal(data, &obj); err == nil { + c.ContainerStr = nil + c.ContainerObject = &obj + return nil + } + return fmt.Errorf("container field is neither a string nor a container object") } // AnthropicMessageRequest represents an Anthropic messages API request @@ -186,6 +335,7 @@ type AnthropicMessageRequest struct { ServiceTier *string `json:"service_tier,omitempty"` // "auto" or "standard_only" InferenceGeo *string `json:"inference_geo,omitempty"` // the geographic region for inference processing. If not specified, the workspace's default_inference_geo is used. ContextManagement *ContextManagement `json:"context_management,omitempty"` + Container *AnthropicContainer `json:"container,omitempty"` // string id OR object with skills[]; skills require skills-2025-10-02 beta // Extra params for advanced use cases ExtraParams map[string]interface{} `json:"-"` @@ -212,8 +362,9 @@ type AnthropicMetaData struct { } type AnthropicThinking struct { - Type string `json:"type"` // "enabled" or "disabled" - BudgetTokens *int `json:"budget_tokens,omitempty"` + Type string `json:"type"` // "enabled", "disabled", or "adaptive" + BudgetTokens *int `json:"budget_tokens,omitempty"` // Only for type "enabled" (not supported on Opus 4.7+) + Display *string `json:"display,omitempty"` // "summarized" | "omitted" — controls whether thinking content appears in the response (Opus 4.7+) } type ContextManagementEditType string @@ -461,6 +612,7 @@ var anthropicMessageRequestKnownFields = map[string]bool{ "service_tier": true, "inference_geo": true, "context_management": true, + "container": true, "extra_params": true, "fallbacks": true, } @@ -685,54 +837,205 @@ func (mc *AnthropicContent) UnmarshalJSON(data []byte) error { type AnthropicContentBlockType string const ( - AnthropicContentBlockTypeText AnthropicContentBlockType = "text" - AnthropicContentBlockTypeImage AnthropicContentBlockType = "image" - AnthropicContentBlockTypeDocument AnthropicContentBlockType = "document" - AnthropicContentBlockTypeToolUse AnthropicContentBlockType = "tool_use" - AnthropicContentBlockTypeServerToolUse AnthropicContentBlockType = "server_tool_use" - AnthropicContentBlockTypeToolResult AnthropicContentBlockType = "tool_result" - AnthropicContentBlockTypeWebSearchToolResult AnthropicContentBlockType = "web_search_tool_result" - AnthropicContentBlockTypeWebSearchToolResultError AnthropicContentBlockType = "web_search_tool_result_error" - AnthropicContentBlockTypeWebSearchResult AnthropicContentBlockType = "web_search_result" - AnthropicContentBlockTypeWebFetchToolResult AnthropicContentBlockType = "web_fetch_tool_result" - AnthropicContentBlockTypeMCPToolUse AnthropicContentBlockType = "mcp_tool_use" - AnthropicContentBlockTypeMCPToolResult AnthropicContentBlockType = "mcp_tool_result" - AnthropicContentBlockTypeThinking AnthropicContentBlockType = "thinking" - AnthropicContentBlockTypeRedactedThinking AnthropicContentBlockType = "redacted_thinking" - AnthropicContentBlockTypeCompaction AnthropicContentBlockType = "compaction" + AnthropicContentBlockTypeText AnthropicContentBlockType = "text" + AnthropicContentBlockTypeImage AnthropicContentBlockType = "image" + AnthropicContentBlockTypeDocument AnthropicContentBlockType = "document" + AnthropicContentBlockTypeSearchResult AnthropicContentBlockType = "search_result" + AnthropicContentBlockTypeToolUse AnthropicContentBlockType = "tool_use" + AnthropicContentBlockTypeServerToolUse AnthropicContentBlockType = "server_tool_use" + AnthropicContentBlockTypeToolResult AnthropicContentBlockType = "tool_result" + AnthropicContentBlockTypeWebSearchToolResult AnthropicContentBlockType = "web_search_tool_result" + AnthropicContentBlockTypeWebSearchToolResultError AnthropicContentBlockType = "web_search_tool_result_error" + AnthropicContentBlockTypeWebSearchResult AnthropicContentBlockType = "web_search_result" + AnthropicContentBlockTypeWebFetchToolResult AnthropicContentBlockType = "web_fetch_tool_result" + AnthropicContentBlockTypeCodeExecutionToolResult AnthropicContentBlockType = "code_execution_tool_result" + AnthropicContentBlockTypeBashCodeExecutionToolResult AnthropicContentBlockType = "bash_code_execution_tool_result" + AnthropicContentBlockTypeTextEditorCodeExecutionToolResult AnthropicContentBlockType = "text_editor_code_execution_tool_result" + AnthropicContentBlockTypeToolSearchToolResult AnthropicContentBlockType = "tool_search_tool_result" + AnthropicContentBlockTypeToolReference AnthropicContentBlockType = "tool_reference" + AnthropicContentBlockTypeContainerUpload AnthropicContentBlockType = "container_upload" + AnthropicContentBlockTypeAdvisorToolResult AnthropicContentBlockType = "advisor_tool_result" + AnthropicContentBlockTypeMCPToolUse AnthropicContentBlockType = "mcp_tool_use" + AnthropicContentBlockTypeMCPToolResult AnthropicContentBlockType = "mcp_tool_result" + AnthropicContentBlockTypeThinking AnthropicContentBlockType = "thinking" + AnthropicContentBlockTypeRedactedThinking AnthropicContentBlockType = "redacted_thinking" + AnthropicContentBlockTypeCompaction AnthropicContentBlockType = "compaction" ) -// AnthropicContentBlock represents content in Anthropic message format +// AnthropicToolCallerType identifies which agentic caller produced a tool +// invocation. Appears on tool_use, server_tool_use, and every *_tool_result +// block per Anthropic docs. +// Source: https://platform.claude.com/docs/en/api/beta/messages/create +type AnthropicToolCallerType string + +const ( + AnthropicToolCallerTypeDirect AnthropicToolCallerType = "direct" + AnthropicToolCallerTypeCodeExecution20250825 AnthropicToolCallerType = "code_execution_20250825" + AnthropicToolCallerTypeCodeExecution20260120 AnthropicToolCallerType = "code_execution_20260120" +) + +// AnthropicToolCaller represents the "caller" union on tool-use and +// tool-result blocks. For the two code-execution variants, ToolID is required +// and identifies the upstream server tool that invoked the nested tool. +type AnthropicToolCaller struct { + Type AnthropicToolCallerType `json:"type"` + ToolID *string `json:"tool_id,omitempty"` // Required for code_execution_* caller types +} + +// AnthropicContentBlock represents content in Anthropic message format. +// This is a fat struct: every optional field here is used by at least one +// block type. Consult Anthropic's content-block docs before adding a field +// so we reuse existing ones where semantics align. type AnthropicContentBlock struct { - Type AnthropicContentBlockType `json:"type"` // "text", "image", "document", "tool_use", "tool_result", "thinking" - Text *string `json:"text,omitempty"` // For text content - Thinking *string `json:"thinking,omitempty"` // For thinking content - Signature *string `json:"signature,omitempty"` // For signature content - Data *string `json:"data,omitempty"` // For data content (encrypted data for redacted thinking, signature does not come with this) - ToolUseID *string `json:"tool_use_id,omitempty"` // For tool_result content - ID *string `json:"id,omitempty"` // For tool_use content - Name *string `json:"name,omitempty"` // For tool_use content - Input json.RawMessage `json:"input,omitempty"` // For tool_use content (json.RawMessage preserves key ordering for prompt caching) - ServerName *string `json:"server_name,omitempty"` // For mcp_tool_use content - Content *AnthropicContent `json:"content,omitempty"` // For tool_result content - IsError *bool `json:"is_error,omitempty"` // For tool_result content, indicates error state - Source *AnthropicSource `json:"source,omitempty"` // For image/document content - CacheControl *schemas.CacheControl `json:"cache_control,omitempty"` // For cache control content - Citations *AnthropicCitations `json:"citations,omitempty"` // For document content - Context *string `json:"context,omitempty"` // For document content - Title *string `json:"title,omitempty"` // For document content - URL *string `json:"url,omitempty"` // For web_search_result content - EncryptedContent *string `json:"encrypted_content,omitempty"` // For web_search_result content - PageAge *string `json:"page_age,omitempty"` // For web_search_result content - ErrorCode *string `json:"error_code,omitempty"` // For web_search_tool_result_error content -} - -// AnthropicSource represents image or document source in Anthropic format + Type AnthropicContentBlockType `json:"type"` // Discriminator + Text *string `json:"text,omitempty"` // text block; also "advisor_result" variant + Thinking *string `json:"thinking,omitempty"` // thinking block + Signature *string `json:"signature,omitempty"` // thinking block signature + Data *string `json:"data,omitempty"` // redacted_thinking encrypted data (no signature) + ToolUseID *string `json:"tool_use_id,omitempty"` // tool_result, *_tool_result blocks + ID *string `json:"id,omitempty"` // tool_use, server_tool_use, mcp_tool_use + Name *string `json:"name,omitempty"` // tool_use, server_tool_use; also reused for tool_reference's tool_name via ToolName + Input json.RawMessage `json:"input,omitempty"` // tool_use / server_tool_use (json.RawMessage preserves key ordering for prompt caching) + ServerName *string `json:"server_name,omitempty"` // mcp_tool_use + Content *AnthropicContent `json:"content,omitempty"` // tool_result, *_tool_result; inner structured content or string + IsError *bool `json:"is_error,omitempty"` // tool_result, *_tool_result + Source *AnthropicBlockSource `json:"source,omitempty"` // image, document (SourceObj) or search_result (SourceStr) — union type + CacheControl *schemas.CacheControl `json:"cache_control,omitempty"` // any block + Citations *AnthropicCitations `json:"citations,omitempty"` // text, document, search_result (request config) or response citations array + Context *string `json:"context,omitempty"` // document + Title *string `json:"title,omitempty"` // document, search_result, web_search_result + URL *string `json:"url,omitempty"` // web_search_result, web_fetch_result + EncryptedContent *string `json:"encrypted_content,omitempty"` // web_search_result, advisor_redacted_result, compaction + PageAge *string `json:"page_age,omitempty"` // web_search_result + ErrorCode *string `json:"error_code,omitempty"` // any *_tool_result_error variant + Caller *AnthropicToolCaller `json:"caller,omitempty"` // tool_use, server_tool_use, every *_tool_result block + + // search_result block: the API uses the literal key "source" with a plain + // string value, which collides with the existing Source *AnthropicSource + // field (object form, used by image/document). Supporting both requires + // either (a) a string-or-object union type for Source, or (b) full custom + // Marshal/Unmarshal on AnthropicContentBlock. Deferred until we decide the + // representation — search_result block enum is present above but its + // source string has no typed slot yet. Callers needing it can use + // ExtraParams pass-through on the request side in the meantime. + + // code_execution_tool_result / bash_code_execution_tool_result result-variant fields + Stdout *string `json:"stdout,omitempty"` + Stderr *string `json:"stderr,omitempty"` + ReturnCode *int `json:"return_code,omitempty"` + EncryptedStdout *string `json:"encrypted_stdout,omitempty"` + + // text_editor_code_execution_tool_result variants + FileType *string `json:"file_type,omitempty"` // view_result: "text"|"image"|"pdf" + StartLine *int `json:"start_line,omitempty"` // view_result + NumLines *int `json:"num_lines,omitempty"` // view_result + TotalLines *int `json:"total_lines,omitempty"` // view_result + IsFileUpdate *bool `json:"is_file_update,omitempty"` // create_result + OldStart *int `json:"old_start,omitempty"` // str_replace_result + OldLines *int `json:"old_lines,omitempty"` // str_replace_result + NewStart *int `json:"new_start,omitempty"` // str_replace_result + NewLines *int `json:"new_lines,omitempty"` // str_replace_result + Lines []string `json:"lines,omitempty"` // str_replace_result + ErrorMessage *string `json:"error_message,omitempty"` // text_editor error variant + + // tool_search_tool_result success variant + ToolReferences []AnthropicContentBlock `json:"tool_references,omitempty"` // tool_search_tool_search_result (array of tool_reference blocks) + + // tool_reference block — tool_name field on the block itself + ToolName *string `json:"tool_name,omitempty"` + + // container_upload block + web_fetch_result inner file_id reference + FileID *string `json:"file_id,omitempty"` + + // web_fetch_tool_result / web_fetch_result inner retrieval timestamp + RetrievedAt *string `json:"retrieved_at,omitempty"` +} + +// AnthropicSource represents image or document source in Anthropic format. +// +// Per docs (https://platform.claude.com/docs/en/api/messages/create) the +// documented type values and their carrying fields are: +// - "base64" → MediaType + Data +// - "url" → URL +// - "text" → MediaType ("text/plain") + Data +// - "content_block" → Content (nested string OR array of inner blocks); +// recursive ContentBlockSource used inside DocumentBlockParam +// - "file" → FileID (requires files-api-2025-04-14 beta) +// +// The struct is a superset — only the fields relevant to Type should be set +// at a time. type AnthropicSource struct { - Type string `json:"type"` // "base64", "url", "text", "content_block" - MediaType *string `json:"media_type,omitempty"` // "image/jpeg", "image/png", "application/pdf", etc. - Data *string `json:"data,omitempty"` // Base64-encoded data (for base64 type) - URL *string `json:"url,omitempty"` // URL (for url type) + Type string `json:"type"` // "base64" | "url" | "text" | "content" | "content_block" (alias) | "file" + MediaType *string `json:"media_type,omitempty"` // "image/jpeg", "image/png", "application/pdf", etc. + Data *string `json:"data,omitempty"` // Base64-encoded data (base64 type) or text payload (text type) + URL *string `json:"url,omitempty"` // URL (url type) + FileID *string `json:"file_id,omitempty"` // File ID (file type; requires files-api-2025-04-14 beta) + Content json.RawMessage `json:"content,omitempty"` // For content_block type: nested content — string OR array of inner blocks (TextBlockParam / ImageBlockParam). json.RawMessage preserves exact bytes for prompt caching. +} + +// AnthropicBlockSource is the union "source" field on a content block. +// +// Anthropic's API uses the literal JSON key "source" for two incompatible +// shapes depending on which block the key appears on: +// +// - On `image` / `document` blocks: an OBJECT describing the source +// (type + media_type + data/url/file_id). Modeled by AnthropicSource. +// - On `search_result` blocks: a plain STRING identifier (URL/path). +// +// This union wrapper lets AnthropicContentBlock carry either shape under +// the single "source" JSON key. +// +// Docs: +// - https://platform.claude.com/docs/en/api/messages/create (ImageBlockParam, DocumentBlockParam) +// - https://platform.claude.com/docs/en/api/beta/messages/create (SearchResultBlockParam) +type AnthropicBlockSource struct { + SourceStr *string // search_result: plain string (URL, path, identifier) + SourceObj *AnthropicSource // image / document: object form +} + +// MarshalJSON emits either the string or the object form directly (unwrapped). +// Matches the union-type idiom used by AnthropicCitations, AnthropicContainer, +// and CompactManagementEditTypeAndValue. +func (s AnthropicBlockSource) MarshalJSON() ([]byte, error) { + if s.SourceStr != nil && s.SourceObj != nil { + return nil, fmt.Errorf("both SourceStr and SourceObj are set; only one should be non-nil") + } + if s.SourceStr != nil { + return providerUtils.MarshalSorted(*s.SourceStr) + } + if s.SourceObj != nil { + return providerUtils.MarshalSorted(s.SourceObj) + } + return providerUtils.MarshalSorted(nil) +} + +// UnmarshalJSON decodes either the string or the object form into the union. +// Matches AnthropicCitations.UnmarshalJSON: sonic-decode into each variant, +// first success wins. +// UnmarshalJSON decodes either the string form (search_result blocks) or the +// object form (image/document blocks) into the union. Clears the inactive +// arm on each success so a reused struct never ends up with both fields +// populated (which MarshalJSON rejects). Explicitly handles JSON null. +func (s *AnthropicBlockSource) UnmarshalJSON(data []byte) error { + trimmed := bytes.TrimSpace(data) + if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) { + s.SourceStr = nil + s.SourceObj = nil + return nil + } + var str string + if err := sonic.Unmarshal(data, &str); err == nil { + s.SourceStr = &str + s.SourceObj = nil + return nil + } + var obj AnthropicSource + if err := sonic.Unmarshal(data, &obj); err == nil { + s.SourceStr = nil + s.SourceObj = &obj + return nil + } + return fmt.Errorf("source field is neither a string nor an AnthropicSource object") } type AnthropicCitationType string @@ -796,7 +1099,7 @@ func (ac *AnthropicCitations) MarshalJSON() ([]byte, error) { ac.TextCitations = nil } if ac.Config != nil && ac.TextCitations != nil { - return nil, fmt.Errorf("AnthropicCitations: both Config and TextCitations are set; only one should be non-nil") + return nil, fmt.Errorf("both Config and TextCitations are set; only one should be non-nil") } if ac.Config != nil { @@ -840,7 +1143,9 @@ type AnthropicToolType string const ( AnthropicToolTypeCustom AnthropicToolType = "custom" + AnthropicToolTypeBash20241022 AnthropicToolType = "bash_20241022" // computer-use-2024-10-22 beta AnthropicToolTypeBash20250124 AnthropicToolType = "bash_20250124" + AnthropicToolTypeComputer20241022 AnthropicToolType = "computer_20241022" // computer-use-2024-10-22 beta AnthropicToolTypeComputer20250124 AnthropicToolType = "computer_20250124" AnthropicToolTypeComputer20251124 AnthropicToolType = "computer_20251124" // for claude-opus-4.5, claude-opus-4.6, claude-sonnet-4.6 AnthropicToolTypeTextEditor20250124 AnthropicToolType = "text_editor_20250124" @@ -908,10 +1213,19 @@ type AnthropicToolWebSearch struct { } type AnthropicToolWebFetch struct { - MaxUses *int `json:"max_uses,omitempty"` - AllowedDomains []string `json:"allowed_domains,omitempty"` - BlockedDomains []string `json:"blocked_domains,omitempty"` - MaxContentTokens *int `json:"max_content_tokens,omitempty"` + MaxUses *int `json:"max_uses,omitempty"` + AllowedDomains []string `json:"allowed_domains,omitempty"` + BlockedDomains []string `json:"blocked_domains,omitempty"` + MaxContentTokens *int `json:"max_content_tokens,omitempty"` + Citations *AnthropicCitations `json:"citations,omitempty"` // {enabled: bool} — toggles citation emission on fetched documents + UseCache *bool `json:"use_cache,omitempty"` // web_fetch_20260309+ only — enables server-side page cache +} + +// AnthropicToolTextEditor holds fields specific to the text_editor tool +// variants. Only text_editor_20250728 (and later) honours max_characters +// as a view-truncation cap. +type AnthropicToolTextEditor struct { + MaxCharacters *int `json:"max_characters,omitempty"` // text_editor_20250728+ only } // AnthropicToolInputExample represents an input example for a tool (beta feature) @@ -922,19 +1236,21 @@ type AnthropicToolInputExample struct { // AnthropicTool represents a tool in Anthropic format type AnthropicTool struct { - Name string `json:"name"` - Type *AnthropicToolType `json:"type,omitempty"` - Description *string `json:"description,omitempty"` - InputSchema *schemas.ToolFunctionParameters `json:"input_schema,omitempty"` - CacheControl *schemas.CacheControl `json:"cache_control,omitempty"` - DeferLoading *bool `json:"defer_loading,omitempty"` // Beta: defer loading of tool definition - Strict *bool `json:"strict,omitempty"` // Whether to enforce strict parameter validation - AllowedCallers []string `json:"allowed_callers,omitempty"` // Beta: which callers can use this tool - InputExamples []AnthropicToolInputExample `json:"input_examples,omitempty"` // Beta: example inputs for the tool + Name string `json:"name"` + Type *AnthropicToolType `json:"type,omitempty"` + Description *string `json:"description,omitempty"` + InputSchema *schemas.ToolFunctionParameters `json:"input_schema,omitempty"` + CacheControl *schemas.CacheControl `json:"cache_control,omitempty"` + DeferLoading *bool `json:"defer_loading,omitempty"` // Beta: defer loading of tool definition + Strict *bool `json:"strict,omitempty"` // Whether to enforce strict parameter validation + AllowedCallers []string `json:"allowed_callers,omitempty"` // Beta: which callers can use this tool + InputExamples []AnthropicToolInputExample `json:"input_examples,omitempty"` // Beta: example inputs for the tool + EagerInputStreaming *bool `json:"eager_input_streaming,omitempty"` // Custom tools only; beta fine-grained-tool-streaming-2025-05-14 *AnthropicToolComputerUse *AnthropicToolWebSearch *AnthropicToolWebFetch + *AnthropicToolTextEditor // MCP toolset (mcp-client-2025-11-20 format) — embedded when Type is nil and MCPToolset is set MCPToolset *AnthropicMCPToolsetTool `json:"-"` // Serialized via custom MarshalJSON diff --git a/core/providers/anthropic/utils.go b/core/providers/anthropic/utils.go index a0ff905568..dbde522d6f 100644 --- a/core/providers/anthropic/utils.go +++ b/core/providers/anthropic/utils.go @@ -14,6 +14,77 @@ import ( "github.com/maximhq/bifrost/core/schemas" ) +// anthropicToolTypePrefixToFeature maps Anthropic server-tool type prefixes +// to the corresponding ProviderFeatureSupport flag. Mirrors the structure of +// betaHeaderPrefixToFeature (defined later in this file) so tool-type gating +// and beta-header gating share the same shape. +// +// Prefix-based so future version bumps (e.g. web_search_20261231) flow +// through without a code change. Exact-match types (currently just +// "mcp_toolset") are handled separately. +var anthropicToolTypePrefixToFeature = map[string]func(ProviderFeatureSupport) bool{ + "web_search_": func(f ProviderFeatureSupport) bool { return f.WebSearch }, + "web_fetch_": func(f ProviderFeatureSupport) bool { return f.WebFetch }, + "code_execution_": func(f ProviderFeatureSupport) bool { return f.CodeExecution }, + "computer_": func(f ProviderFeatureSupport) bool { return f.ComputerUse }, + "bash_": func(f ProviderFeatureSupport) bool { return f.Bash }, + "memory_": func(f ProviderFeatureSupport) bool { return f.Memory }, + "text_editor_": func(f ProviderFeatureSupport) bool { return f.TextEditor }, + "tool_search_tool_": func(f ProviderFeatureSupport) bool { return f.ToolSearch }, +} + +// isAnthropicServerToolSupported returns whether the given Anthropic server-tool +// type string is supported by the provider's ProviderFeatureSupport. Unknown +// types return true (forward-compat: let the provider reject if truly invalid +// rather than Bifrost dropping a tool Anthropic has just added). +func isAnthropicServerToolSupported(toolType string, features ProviderFeatureSupport) bool { + // Exact-match types first. + if toolType == "mcp_toolset" { + return features.MCP + } + // Prefix match for versioned types. + for prefix, check := range anthropicToolTypePrefixToFeature { + if strings.HasPrefix(toolType, prefix) { + return check(features) + } + } + return true +} + +// ValidateChatToolsForProvider is the chat-path mirror of +// ValidateToolsForProvider. It partitions []schemas.ChatTool into a keep-set +// (function/custom tools + server tools supported on the target provider) +// and a dropped-set (server-tool Type strings the provider doesn't support +// per ProviderFeatures). +// +// Does NOT mutate its input. Callers decide the policy (silent strip vs +// fail-fast). The Bedrock ChatCompletion path uses silent strip so the +// request still reaches the provider without the unsupported tool; the model +// responds with a prose completion instead of tool use. +// +// Unknown providers keep all tools (safe default for custom providers), +// matching ValidateToolsForProvider. +func ValidateChatToolsForProvider(tools []schemas.ChatTool, provider schemas.ModelProvider) (keep []schemas.ChatTool, dropped []string) { + features, ok := ProviderFeatures[provider] + if !ok { + return tools, nil + } + for _, tool := range tools { + // Function/custom tools are universal — always keep. + if tool.Function != nil || tool.Custom != nil { + keep = append(keep, tool) + continue + } + t := string(tool.Type) + if isAnthropicServerToolSupported(t, features) { + keep = append(keep, tool) + } else { + dropped = append(dropped, t) + } + } + return keep, dropped +} + // ValidateToolsForProvider checks if all tools in the request are supported by the given provider. // Returns an error for the first unsupported tool found. func ValidateToolsForProvider(tools []schemas.ResponsesTool, provider schemas.ModelProvider) error { @@ -90,6 +161,448 @@ var ( } ) +// stripUnsupportedAnthropicFields removes request-level and tool-level fields +// that the target Anthropic-family provider does not support, according to the +// ProviderFeatures map (types.go). Tool-type validation (fail-closed) is handled +// separately by ValidateToolsForProvider; this helper handles request-level +// fields (strip silently, since they're additive enhancements). +// +// Mutates req in place. Safe to call multiple times. +func stripUnsupportedAnthropicFields(req *AnthropicMessageRequest, provider schemas.ModelProvider, model string) { + if req == nil { + return + } + features, ok := ProviderFeatures[provider] + if !ok { + // Unknown provider — safe default: don't strip anything. + return + } + + // Request-level fields gated by ProviderFeatures flags. + if req.Container != nil { + // Skills form (object with skills[]) is beta-gated; bare string id is universal. + // Intent signal: non-empty skills = caller explicitly wants skills; empty + // skills:[] = likely caller oversight we can silently correct. + hasSkills := req.Container.ContainerObject != nil && len(req.Container.ContainerObject.Skills) > 0 + // Strip an explicit empty or non-empty skills array on Skills=false + // providers. omitempty already handles this at serialize time for empty + // arrays, but we clear it explicitly so hasSkills-based decisions below + // and raw-path parity both stay correct. + if !features.Skills && req.Container.ContainerObject != nil && req.Container.ContainerObject.Skills != nil { + req.Container.ContainerObject.Skills = nil + } + switch { + case hasSkills && !features.Skills: + // Caller wanted non-empty skills but provider doesn't support them. + req.Container = nil + case !hasSkills && !features.ContainerBasic: + req.Container = nil + } + } + if len(req.MCPServers) > 0 && !features.MCP { + req.MCPServers = nil + } + // Speed is both provider-gated (FastMode flag) and model-gated + // (Opus 4.6 only per SupportsFastMode). Strip if either gate fails — + // Anthropic's API rejects speed:"fast" on non-Opus-4.6 models with a 400. + if req.Speed != nil && (!features.FastMode || !SupportsFastMode(model)) { + req.Speed = nil + } + if req.OutputConfig != nil && req.OutputConfig.TaskBudget != nil && !features.TaskBudgets { + req.OutputConfig.TaskBudget = nil + // Clean up an empty OutputConfig so it doesn't serialize as {} + if req.OutputConfig.Format == nil && req.OutputConfig.Effort == nil { + req.OutputConfig = nil + } + } + if req.InferenceGeo != nil && !features.InferenceGeo { + req.InferenceGeo = nil + } + // cache_control.scope — strip on providers without PromptCachingScope + // support at every slot scope can live: top-level request, tools, system + // blocks, and message content blocks. Vertex additionally uses the + // marshal-time SetStripCacheControlScope mechanism (vertex/utils.go:104, + // types.go MarshalJSON); after this strip runs, that marshal-time pass + // becomes a safe no-op for Vertex (nothing left to strip). + if !features.PromptCachingScope { + // Top-level. + if req.CacheControl != nil && req.CacheControl.Scope != nil { + req.CacheControl.Scope = nil + // If scope was the only meaningful field, drop the whole CacheControl + // so we don't serialize an empty object. + if req.CacheControl.TTL == nil && req.CacheControl.Type == "" { + req.CacheControl = nil + } + } + // Per-tool cache_control.scope. + for i := range req.Tools { + if req.Tools[i].CacheControl != nil && req.Tools[i].CacheControl.Scope != nil { + req.Tools[i].CacheControl.Scope = nil + // Drop the parent if scope was the only meaningful field. + if req.Tools[i].CacheControl.TTL == nil && req.Tools[i].CacheControl.Type == "" { + req.Tools[i].CacheControl = nil + } + } + } + // System block scopes. + if req.System != nil { + for i := range req.System.ContentBlocks { + if req.System.ContentBlocks[i].CacheControl != nil && req.System.ContentBlocks[i].CacheControl.Scope != nil { + req.System.ContentBlocks[i].CacheControl.Scope = nil + if req.System.ContentBlocks[i].CacheControl.TTL == nil && req.System.ContentBlocks[i].CacheControl.Type == "" { + req.System.ContentBlocks[i].CacheControl = nil + } + } + } + } + // Message block scopes. + for mi := range req.Messages { + for ci := range req.Messages[mi].Content.ContentBlocks { + cc := req.Messages[mi].Content.ContentBlocks[ci].CacheControl + if cc != nil && cc.Scope != nil { + cc.Scope = nil + if cc.TTL == nil && cc.Type == "" { + req.Messages[mi].Content.ContentBlocks[ci].CacheControl = nil + } + } + } + } + } + if req.ContextManagement != nil { + // Gate edits by their type — compaction vs context-editing flags. + kept := make([]ContextManagementEdit, 0, len(req.ContextManagement.Edits)) + for _, edit := range req.ContextManagement.Edits { + switch edit.Type { + case ContextManagementEditTypeCompact: + if features.Compaction { + kept = append(kept, edit) + } + case ContextManagementEditTypeClearToolUses, ContextManagementEditTypeClearThinking: + if features.ContextEditing { + kept = append(kept, edit) + } + default: + // Unknown edit type — keep and let upstream reject. + kept = append(kept, edit) + } + } + if len(kept) == 0 { + req.ContextManagement = nil + } else { + req.ContextManagement.Edits = kept + } + } + + // Tool-level flags — strip per-tool without dropping the tool itself. + for i := range req.Tools { + tool := &req.Tools[i] + if tool.DeferLoading != nil && !features.AdvancedToolUse { + tool.DeferLoading = nil + } + if len(tool.AllowedCallers) > 0 && !features.AdvancedToolUse { + tool.AllowedCallers = nil + } + // InputExamples has its own feature flag (InputExamples) because + // Bedrock supports the tool-examples-2025-10-29 header standalone — + // without the full advanced-tool-use-2025-11-20 bundle. On Anthropic + // and Azure, the bundle flag (AdvancedToolUse) is also set, so either + // gate would work there. + if len(tool.InputExamples) > 0 && !features.InputExamples { + tool.InputExamples = nil + } + if tool.EagerInputStreaming != nil && !features.EagerInputStreaming { + tool.EagerInputStreaming = nil + } + if tool.Strict != nil && !features.StructuredOutputs { + tool.Strict = nil + } + } +} + +// stripUnsupportedFieldsFromRawBody is the raw-JSON equivalent of +// StripUnsupportedAnthropicFields. It mutates the request body bytes using +// sjson/gjson (preserving key order for prompt caching) so the raw-body +// passthrough path has behavioural parity with the typed conversion path. +// +// Scope: every field the typed helper handles. +// - top-level: speed (provider + model gated), container (.skills gated by +// features.Skills, bare string by features.ContainerBasic), mcp_servers, +// inference_geo, cache_control.scope, output_config.task_budget, +// context_management.edits[] (gated per edit type). +// - nested: tool.CacheControl.Scope, system block scopes, message block +// scopes (all stripped when !features.PromptCachingScope). +// - per-tool: defer_loading, allowed_callers (AdvancedToolUse bundle), +// input_examples (narrow InputExamples flag), eager_input_streaming +// (EagerInputStreaming), strict (StructuredOutputs). +// +// Unknown providers: safe default — no stripping (parity with the typed helper). +// Unknown edit types in context_management: left in place for the provider +// to reject (parity with the typed helper). +func stripUnsupportedFieldsFromRawBody(jsonBody []byte, provider schemas.ModelProvider, model string) ([]byte, error) { + if len(jsonBody) == 0 { + return jsonBody, nil + } + features, ok := ProviderFeatures[provider] + if !ok { + return jsonBody, nil + } + + // Fall back to body-embedded model when caller didn't pass one. + if model == "" { + if modelResult := providerUtils.GetJSONField(jsonBody, "model"); modelResult.Exists() { + model = modelResult.String() + } + } + + var err error + + // speed — provider AND model gate + if providerUtils.JSONFieldExists(jsonBody, "speed") { + if !features.FastMode || !SupportsFastMode(model) { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "speed") + if err != nil { + return nil, fmt.Errorf("strip raw speed: %w", err) + } + } + } + + // inference_geo + if !features.InferenceGeo && providerUtils.JSONFieldExists(jsonBody, "inference_geo") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "inference_geo") + if err != nil { + return nil, fmt.Errorf("strip raw inference_geo: %w", err) + } + } + + // mcp_servers + if !features.MCP && providerUtils.JSONFieldExists(jsonBody, "mcp_servers") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "mcp_servers") + if err != nil { + return nil, fmt.Errorf("strip raw mcp_servers: %w", err) + } + } + + // container — two variants: bare string id (ContainerBasic), or object + // {id, skills[]} where skills require Skills flag. + // Distinguishes three states: no skills field (bare form), skills:[] (empty + // array — caller oversight, silently strip), skills:[…] (non-empty — caller + // explicitly wants skills). Mirrors the typed path's hybrid decision. + if containerResult := providerUtils.GetJSONField(jsonBody, "container"); containerResult.Exists() { + hasSkillsField, hasNonEmptySkills := false, false + if containerResult.IsObject() { + if skills := containerResult.Get("skills"); skills.Exists() { + hasSkillsField = true + if skills.IsArray() && len(skills.Array()) > 0 { + hasNonEmptySkills = true + } + } + } + // Always strip the skills key on Skills=false providers — critical on + // the raw path since bytes flow directly to the provider and an + // explicit empty array would still be rejected as unknown field. + if !features.Skills && hasSkillsField { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "container.skills") + if err != nil { + return nil, fmt.Errorf("strip raw container.skills: %w", err) + } + } + drop := false + switch { + case hasNonEmptySkills: + drop = !features.Skills + default: + drop = !features.ContainerBasic + } + if drop { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "container") + if err != nil { + return nil, fmt.Errorf("strip raw container: %w", err) + } + } + } + + // output_config.task_budget + if !features.TaskBudgets && providerUtils.JSONFieldExists(jsonBody, "output_config.task_budget") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "output_config.task_budget") + if err != nil { + return nil, fmt.Errorf("strip raw output_config.task_budget: %w", err) + } + // Drop an empty parent so we don't serialize output_config:{} (matches + // typed-path behavior at lines 129-134). + if oc := providerUtils.GetJSONField(jsonBody, "output_config"); oc.IsObject() && len(oc.Map()) == 0 { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "output_config") + if err != nil { + return nil, fmt.Errorf("strip raw output_config: %w", err) + } + } + } + + // top-level cache_control.scope + if !features.PromptCachingScope && providerUtils.JSONFieldExists(jsonBody, "cache_control.scope") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "cache_control.scope") + if err != nil { + return nil, fmt.Errorf("strip raw cache_control.scope: %w", err) + } + // Drop an empty parent so we don't serialize cache_control:{} (matches + // typed-path behavior at lines 147-153). + if cc := providerUtils.GetJSONField(jsonBody, "cache_control"); cc.IsObject() && len(cc.Map()) == 0 { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "cache_control") + if err != nil { + return nil, fmt.Errorf("strip raw cache_control: %w", err) + } + } + } + + // context_management.edits[] — gate per edit.type. + if editsResult := providerUtils.GetJSONField(jsonBody, "context_management.edits"); editsResult.Exists() && editsResult.IsArray() { + edits := editsResult.Array() + // Collect indices to drop (iterate forwards, delete in reverse). + dropIndices := []int{} + for i, edit := range edits { + editType := edit.Get("type").String() + keep := true + switch editType { + case string(ContextManagementEditTypeCompact): + keep = features.Compaction + case string(ContextManagementEditTypeClearToolUses), string(ContextManagementEditTypeClearThinking): + keep = features.ContextEditing + } + if !keep { + dropIndices = append(dropIndices, i) + } + } + if len(dropIndices) == len(edits) && len(edits) > 0 { + // All edits unsupported — drop the whole context_management. + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "context_management") + if err != nil { + return nil, fmt.Errorf("strip raw context_management: %w", err) + } + } else { + for i := len(dropIndices) - 1; i >= 0; i-- { + path := fmt.Sprintf("context_management.edits.%d", dropIndices[i]) + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, path) + if err != nil { + return nil, fmt.Errorf("strip raw context_management.edits[%d]: %w", dropIndices[i], err) + } + } + } + } + + // per-tool flags + nested scope + if toolsResult := providerUtils.GetJSONField(jsonBody, "tools"); toolsResult.Exists() && toolsResult.IsArray() { + for i := range toolsResult.Array() { + base := fmt.Sprintf("tools.%d", i) + if !features.AdvancedToolUse { + if providerUtils.JSONFieldExists(jsonBody, base+".defer_loading") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, base+".defer_loading") + if err != nil { + return nil, fmt.Errorf("strip raw %s.defer_loading: %w", base, err) + } + } + if providerUtils.JSONFieldExists(jsonBody, base+".allowed_callers") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, base+".allowed_callers") + if err != nil { + return nil, fmt.Errorf("strip raw %s.allowed_callers: %w", base, err) + } + } + } + if !features.InputExamples && providerUtils.JSONFieldExists(jsonBody, base+".input_examples") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, base+".input_examples") + if err != nil { + return nil, fmt.Errorf("strip raw %s.input_examples: %w", base, err) + } + } + if !features.EagerInputStreaming && providerUtils.JSONFieldExists(jsonBody, base+".eager_input_streaming") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, base+".eager_input_streaming") + if err != nil { + return nil, fmt.Errorf("strip raw %s.eager_input_streaming: %w", base, err) + } + } + if !features.StructuredOutputs && providerUtils.JSONFieldExists(jsonBody, base+".strict") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, base+".strict") + if err != nil { + return nil, fmt.Errorf("strip raw %s.strict: %w", base, err) + } + } + if !features.PromptCachingScope && providerUtils.JSONFieldExists(jsonBody, base+".cache_control.scope") { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, base+".cache_control.scope") + if err != nil { + return nil, fmt.Errorf("strip raw %s.cache_control.scope: %w", base, err) + } + // Drop the parent if cache_control is now an empty object, so + // we don't forward a malformed `cache_control: {}` marker. + if ccResult := providerUtils.GetJSONField(jsonBody, base+".cache_control"); ccResult.Exists() && ccResult.IsObject() && len(ccResult.Map()) == 0 { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, base+".cache_control") + if err != nil { + return nil, fmt.Errorf("strip raw %s.cache_control empty parent: %w", base, err) + } + } + } + } + } + + // Nested scope on system blocks (system can be a string OR array of blocks). + if !features.PromptCachingScope { + if systemResult := providerUtils.GetJSONField(jsonBody, "system"); systemResult.Exists() && systemResult.IsArray() { + for i := range systemResult.Array() { + path := fmt.Sprintf("system.%d.cache_control.scope", i) + if providerUtils.JSONFieldExists(jsonBody, path) { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, path) + if err != nil { + return nil, fmt.Errorf("strip raw system[%d].cache_control.scope: %w", i, err) + } + parentPath := fmt.Sprintf("system.%d.cache_control", i) + if ccResult := providerUtils.GetJSONField(jsonBody, parentPath); ccResult.Exists() && ccResult.IsObject() && len(ccResult.Map()) == 0 { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, parentPath) + if err != nil { + return nil, fmt.Errorf("strip raw system[%d].cache_control empty parent: %w", i, err) + } + } + } + } + } + // Nested scope on messages[].content[] blocks. + if messagesResult := providerUtils.GetJSONField(jsonBody, "messages"); messagesResult.Exists() && messagesResult.IsArray() { + messages := messagesResult.Array() + for mi := range messages { + contentResult := providerUtils.GetJSONField(jsonBody, fmt.Sprintf("messages.%d.content", mi)) + if !contentResult.Exists() || !contentResult.IsArray() { + continue + } + for ci := range contentResult.Array() { + path := fmt.Sprintf("messages.%d.content.%d.cache_control.scope", mi, ci) + if providerUtils.JSONFieldExists(jsonBody, path) { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, path) + if err != nil { + return nil, fmt.Errorf("strip raw messages[%d].content[%d].cache_control.scope: %w", mi, ci, err) + } + parentPath := fmt.Sprintf("messages.%d.content.%d.cache_control", mi, ci) + if ccResult := providerUtils.GetJSONField(jsonBody, parentPath); ccResult.Exists() && ccResult.IsObject() && len(ccResult.Map()) == 0 { + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, parentPath) + if err != nil { + return nil, fmt.Errorf("strip raw messages[%d].content[%d].cache_control empty parent: %w", mi, ci, err) + } + } + } + } + } + } + } + + return jsonBody, nil +} + +// IsOpus47 returns true if the model is Claude Opus 4.7 or a later generation where: +// - Extended thinking (budget_tokens) is removed — only adaptive thinking is supported. +// - temperature, top_p, and top_k are not supported (setting them returns a 400). +func IsOpus47(model string) bool { + model = strings.ToLower(model) + if !strings.Contains(model, "opus") { + return false + } + return strings.Contains(model, "4-7") || strings.Contains(model, "4.7") +} + // SupportsNativeEffort returns true if the model supports Anthropic's native output_config.effort parameter. // Currently supported on Claude Opus 4.5 and Opus 4.6. func SupportsNativeEffort(model string) bool { @@ -101,12 +614,33 @@ func SupportsNativeEffort(model string) bool { strings.Contains(model, "4-6") || strings.Contains(model, "4.6") } +// SupportsFastMode returns true if the model supports speed:"fast" (research +// preview). Per Anthropic's fast-mode docs, only Opus 4.6 supports it; +// requests carrying speed:"fast" to any other model are rejected with 400. +// Beta header: fast-mode-2026-02-01. +// +// Source: https://platform.claude.com/docs/en/build-with-claude/fast-mode +func SupportsFastMode(model string) bool { + model = strings.ToLower(model) + if !strings.Contains(model, "opus") { + return false + } + return strings.Contains(model, "4-6") || strings.Contains(model, "4.6") +} + // SupportsAdaptiveThinking returns true if the model supports thinking.type: "adaptive". -// Currently only supported on Claude Opus 4.6. +// Currently supported on Claude Opus 4.6, Claude Sonnet 4.6, and Claude Opus 4.7+. +// On Opus 4.7+ adaptive is the only thinking-on mode; on Opus 4.6 and Sonnet 4.6 it +// coexists with the deprecated budget_tokens-based extended thinking. func SupportsAdaptiveThinking(model string) bool { + if IsOpus47(model) { + return true + } model = strings.ToLower(model) - return strings.Contains(model, "opus") && - (strings.Contains(model, "4-6") || strings.Contains(model, "4.6")) + if !strings.Contains(model, "4-6") && !strings.Contains(model, "4.6") { + return false + } + return strings.Contains(model, "opus") || strings.Contains(model, "sonnet") } // MapBifrostEffortToAnthropic maps a Bifrost effort level to an Anthropic effort level. @@ -118,15 +652,6 @@ func MapBifrostEffortToAnthropic(effort string) string { return effort } -// MapAnthropicEffortToBifrost maps an Anthropic effort level to a Bifrost effort level. -// Anthropic supports "max" (Opus 4.6+) which is not in Bifrost's enum; it maps to "high". -func MapAnthropicEffortToBifrost(effort string) string { - if effort == "max" { - return "high" - } - return effort -} - // setEffortOnOutputConfig merges the effort value into the request's OutputConfig, // preserving any existing Format field (used for structured outputs). func setEffortOnOutputConfig(req *AnthropicMessageRequest, effort string) { @@ -183,6 +708,26 @@ func getRequestBodyForResponses(ctx *schemas.BifrostContext, request *schemas.Bi if err != nil { return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderRequestMarshal, err, providerName) } + // Sanitize raw-body fields the target provider does not support. + // Behavioural parity with StripUnsupportedAnthropicFields on the typed path. + // Feature gating keyed to schemas.Anthropic (not providerName) to match + // the typed path below which also hardcodes schemas.Anthropic — ensures + // custom Anthropic aliases get identical feature lookup in both modes. + jsonBody, err = stripUnsupportedFieldsFromRawBody(jsonBody, schemas.Anthropic, "") + if err != nil { + return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderRequestMarshal, err, providerName) + } + // Auto-inject matching anthropic-beta headers for fields the sanitizer + // preserved (speed, task_budget, cache_control.scope, input_examples, + // defer_loading, allowed_callers, eager_input_streaming, mcp_servers, + // structured outputs, etc). Without this, raw-body callers who supply + // gated fields but not headers would 400 upstream. Single source of + // truth: probe-unmarshal into the typed struct and reuse the typed + // path's header walker. + var probe AnthropicMessageRequest + if err := schemas.Unmarshal(jsonBody, &probe); err == nil { + AddMissingBetaHeadersToContext(ctx, &probe, schemas.Anthropic) + } // Remove excluded fields for _, field := range excludeFields { jsonBody, err = providerUtils.DeleteJSONField(jsonBody, field) @@ -235,6 +780,13 @@ func getRequestBodyForResponses(ctx *schemas.BifrostContext, request *schemas.Bi } } } + + // delete fallbacks field + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "fallbacks") + if err != nil { + return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderRequestMarshal, err, providerName) + } + return jsonBody, nil } @@ -265,15 +817,48 @@ func AddMissingBetaHeadersToContext(ctx *schemas.BifrostContext, req *AnthropicM headers = appendUniqueHeader(headers, AnthropicStructuredOutputsBetaHeader) } } - // Check for advanced-tool-use features + // Check for advanced-tool-use features. defer_loading and + // allowed_callers are only available as part of the bundle + // header; input_examples additionally has a standalone header + // (tool-examples-2025-10-29) used on Bedrock where the bundle is + // not accepted. if tool.DeferLoading != nil && *tool.DeferLoading { - headers = appendUniqueHeader(headers, AnthropicAdvancedToolUseBetaHeader) + if !hasProvider || features.AdvancedToolUse { + headers = appendUniqueHeader(headers, AnthropicAdvancedToolUseBetaHeader) + } } if len(tool.InputExamples) > 0 { - headers = appendUniqueHeader(headers, AnthropicAdvancedToolUseBetaHeader) + if !hasProvider || features.AdvancedToolUse { + // Bundle header covers input_examples transitively. + headers = appendUniqueHeader(headers, AnthropicAdvancedToolUseBetaHeader) + } else if features.InputExamples { + // Narrow standalone header (e.g. Bedrock). + headers = appendUniqueHeader(headers, AnthropicToolExamplesBetaHeader) + } } if len(tool.AllowedCallers) > 0 { - headers = appendUniqueHeader(headers, AnthropicAdvancedToolUseBetaHeader) + if !hasProvider || features.AdvancedToolUse { + headers = appendUniqueHeader(headers, AnthropicAdvancedToolUseBetaHeader) + } + } + // input_examples has both bundle coverage AND a standalone header. + // Prefer the bundle header when the provider accepts the bundle + // (covers input_examples transitively); fall back to the narrow + // standalone header (Bedrock) when only InputExamples is set. + if len(tool.InputExamples) > 0 { + if !hasProvider || features.AdvancedToolUse { + headers = appendUniqueHeader(headers, AnthropicAdvancedToolUseBetaHeader) + } else if features.InputExamples { + headers = appendUniqueHeader(headers, AnthropicToolExamplesBetaHeader) + } + } + // Check for fine-grained tool streaming (eager_input_streaming). + // Beta fine-grained-tool-streaming-2025-05-14 — required for + // input_json_delta streaming on custom tools. + if tool.EagerInputStreaming != nil && *tool.EagerInputStreaming { + if !hasProvider || features.EagerInputStreaming { + headers = appendUniqueHeader(headers, AnthropicEagerInputStreamingBetaHeader) + } } // Check for cache control with scope if !hasCachingScope && tool.CacheControl != nil && tool.CacheControl.Scope != nil { @@ -284,6 +869,14 @@ func AddMissingBetaHeadersToContext(ctx *schemas.BifrostContext, req *AnthropicM } } } + // Check for cache control with scope at the top level of the request + // (mirrors the tool/system/message checks below). + if !hasCachingScope && req.CacheControl != nil && req.CacheControl.Scope != nil { + if !hasProvider || features.PromptCachingScope { + headers = appendUniqueHeader(headers, AnthropicPromptCachingScopeBetaHeader) + hasCachingScope = true + } + } // Check for compaction if req.ContextManagement != nil { for _, edit := range req.ContextManagement.Edits { @@ -311,12 +904,20 @@ func AddMissingBetaHeadersToContext(ctx *schemas.BifrostContext, req *AnthropicM headers = appendUniqueHeader(headers, AnthropicInterleavedThinkingBetaHeader) } } - // Check for fast mode + // Check for fast mode. Only add the beta header when both the provider + // supports fast mode AND the model does (Opus 4.6 only per + // SupportsFastMode); otherwise sending the header guarantees a 400. if req.Speed != nil && *req.Speed == "fast" { - if !hasProvider || features.FastMode { + if (!hasProvider || features.FastMode) && SupportsFastMode(req.Model) { headers = appendUniqueHeader(headers, AnthropicFastModeBetaHeader) } } + // Check for task budget + if req.OutputConfig != nil && req.OutputConfig.TaskBudget != nil { + if !hasProvider || features.TaskBudgets { + headers = appendUniqueHeader(headers, AnthropicTaskBudgetsBetaHeader) + } + } // Check for output format (structured outputs) if req.OutputFormat != nil { if !hasProvider || features.StructuredOutputs { @@ -393,11 +994,14 @@ var betaHeaderPrefixKnown = []string{ "context-management-", "files-api-", AnthropicAdvancedToolUseBetaHeaderPrefix, + AnthropicToolExamplesBetaHeaderPrefix, AnthropicInterleavedThinkingBetaHeaderPrefix, AnthropicSkillsBetaHeaderPrefix, AnthropicContext1MBetaHeaderPrefix, AnthropicFastModeBetaHeaderPrefix, AnthropicRedactThinkingBetaHeaderPrefix, + AnthropicTaskBudgetsBetaHeaderPrefix, + AnthropicEagerInputStreamingBetaHeaderPrefix, } // betaHeaderPrefixExists checks if any header in existing shares a known prefix with newHeader. @@ -587,11 +1191,14 @@ var betaHeaderPrefixToFeature = map[string]func(ProviderFeatureSupport) bool{ "context-management-": func(f ProviderFeatureSupport) bool { return f.ContextEditing }, "files-api-": func(f ProviderFeatureSupport) bool { return f.FilesAPI }, AnthropicAdvancedToolUseBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.AdvancedToolUse }, + AnthropicToolExamplesBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.InputExamples }, AnthropicInterleavedThinkingBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.InterleavedThinking }, AnthropicSkillsBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.Skills }, AnthropicContext1MBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.Context1M }, AnthropicFastModeBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.FastMode }, AnthropicRedactThinkingBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.RedactThinking }, + AnthropicTaskBudgetsBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.TaskBudgets }, + AnthropicEagerInputStreamingBetaHeaderPrefix: func(f ProviderFeatureSupport) bool { return f.EagerInputStreaming }, } // MergeBetaHeaders collects anthropic-beta values from provider ExtraHeaders and @@ -1080,7 +1687,7 @@ func ConvertToAnthropicImageBlock(block schemas.ChatContentBlock) AnthropicConte imageBlock := AnthropicContentBlock{ Type: AnthropicContentBlockTypeImage, CacheControl: block.CacheControl, - Source: &AnthropicSource{}, + Source: &AnthropicBlockSource{SourceObj: &AnthropicSource{}}, } if block.ImageURLStruct == nil { @@ -1091,8 +1698,8 @@ func ConvertToAnthropicImageBlock(block schemas.ChatContentBlock) AnthropicConte sanitizedURL, err := schemas.SanitizeImageURL(block.ImageURLStruct.URL) if err != nil { // Best-effort: treat as a regular URL without sanitization - imageBlock.Source.Type = "url" - imageBlock.Source.URL = &block.ImageURLStruct.URL + imageBlock.Source.SourceObj.Type = "url" + imageBlock.Source.SourceObj.URL = &block.ImageURLStruct.URL return imageBlock } urlTypeInfo := schemas.ExtractURLTypeInfo(sanitizedURL) @@ -1113,18 +1720,18 @@ func ConvertToAnthropicImageBlock(block schemas.ChatContentBlock) AnthropicConte // Convert to Anthropic source format if formattedImgContent.Type == schemas.ImageContentTypeURL { - imageBlock.Source.Type = "url" - imageBlock.Source.URL = &formattedImgContent.URL + imageBlock.Source.SourceObj.Type = "url" + imageBlock.Source.SourceObj.URL = &formattedImgContent.URL } else { if formattedImgContent.MediaType != "" { - imageBlock.Source.MediaType = &formattedImgContent.MediaType + imageBlock.Source.SourceObj.MediaType = &formattedImgContent.MediaType } - imageBlock.Source.Type = "base64" + imageBlock.Source.SourceObj.Type = "base64" // Use the base64 data without the data URL prefix if urlTypeInfo.DataURLWithoutPrefix != nil { - imageBlock.Source.Data = urlTypeInfo.DataURLWithoutPrefix + imageBlock.Source.SourceObj.Data = urlTypeInfo.DataURLWithoutPrefix } else { - imageBlock.Source.Data = &formattedImgContent.URL + imageBlock.Source.SourceObj.Data = &formattedImgContent.URL } } @@ -1136,7 +1743,7 @@ func ConvertToAnthropicDocumentBlock(block schemas.ChatContentBlock) AnthropicCo documentBlock := AnthropicContentBlock{ Type: AnthropicContentBlockTypeDocument, CacheControl: block.CacheControl, - Source: &AnthropicSource{}, + Source: &AnthropicBlockSource{SourceObj: &AnthropicSource{}}, } if block.Citations != nil { @@ -1156,8 +1763,8 @@ func ConvertToAnthropicDocumentBlock(block schemas.ChatContentBlock) AnthropicCo // Handle file URL if file.FileURL != nil && *file.FileURL != "" { - documentBlock.Source.Type = "url" - documentBlock.Source.URL = file.FileURL + documentBlock.Source.SourceObj.Type = "url" + documentBlock.Source.SourceObj.URL = file.FileURL return documentBlock } @@ -1167,8 +1774,8 @@ func ConvertToAnthropicDocumentBlock(block schemas.ChatContentBlock) AnthropicCo // Check if it's plain text based on file type if file.FileType != nil && (*file.FileType == "text/plain" || *file.FileType == "txt") { - documentBlock.Source.Type = "text" - documentBlock.Source.Data = &fileData + documentBlock.Source.SourceObj.Type = "text" + documentBlock.Source.SourceObj.Data = &fileData return documentBlock } @@ -1177,30 +1784,30 @@ func ConvertToAnthropicDocumentBlock(block schemas.ChatContentBlock) AnthropicCo if urlTypeInfo.DataURLWithoutPrefix != nil { // It's a data URL, extract the base64 content - documentBlock.Source.Type = "base64" - documentBlock.Source.Data = urlTypeInfo.DataURLWithoutPrefix + documentBlock.Source.SourceObj.Type = "base64" + documentBlock.Source.SourceObj.Data = urlTypeInfo.DataURLWithoutPrefix // Set media type from data URL or file type if urlTypeInfo.MediaType != nil { - documentBlock.Source.MediaType = urlTypeInfo.MediaType + documentBlock.Source.SourceObj.MediaType = urlTypeInfo.MediaType } else if file.FileType != nil { - documentBlock.Source.MediaType = file.FileType + documentBlock.Source.SourceObj.MediaType = file.FileType } return documentBlock } } // Default to base64 for binary files - documentBlock.Source.Type = "base64" - documentBlock.Source.Data = &fileData + documentBlock.Source.SourceObj.Type = "base64" + documentBlock.Source.SourceObj.Data = &fileData // Set media type if file.FileType != nil { - documentBlock.Source.MediaType = file.FileType + documentBlock.Source.SourceObj.MediaType = file.FileType } else { // Default to PDF if not specified mediaType := "application/pdf" - documentBlock.Source.MediaType = &mediaType + documentBlock.Source.SourceObj.MediaType = &mediaType } return documentBlock } @@ -1213,7 +1820,7 @@ func ConvertResponsesFileBlockToAnthropic(fileBlock *schemas.ResponsesInputMessa documentBlock := AnthropicContentBlock{ Type: AnthropicContentBlockTypeDocument, CacheControl: cacheControl, - Source: &AnthropicSource{}, + Source: &AnthropicBlockSource{SourceObj: &AnthropicSource{}}, } if citations != nil { @@ -1235,9 +1842,9 @@ func ConvertResponsesFileBlockToAnthropic(fileBlock *schemas.ResponsesInputMessa // Check if it's plain text based on file type if fileBlock.FileType != nil && (*fileBlock.FileType == "text/plain" || *fileBlock.FileType == "txt") { - documentBlock.Source.Type = "text" - documentBlock.Source.Data = &fileData - documentBlock.Source.MediaType = schemas.Ptr("text/plain") + documentBlock.Source.SourceObj.Type = "text" + documentBlock.Source.SourceObj.Data = &fileData + documentBlock.Source.SourceObj.MediaType = schemas.Ptr("text/plain") return documentBlock } @@ -1247,38 +1854,38 @@ func ConvertResponsesFileBlockToAnthropic(fileBlock *schemas.ResponsesInputMessa if urlTypeInfo.DataURLWithoutPrefix != nil { // It's a data URL, extract the base64 content - documentBlock.Source.Type = "base64" - documentBlock.Source.Data = urlTypeInfo.DataURLWithoutPrefix + documentBlock.Source.SourceObj.Type = "base64" + documentBlock.Source.SourceObj.Data = urlTypeInfo.DataURLWithoutPrefix // Set media type from data URL or file type if urlTypeInfo.MediaType != nil { - documentBlock.Source.MediaType = urlTypeInfo.MediaType + documentBlock.Source.SourceObj.MediaType = urlTypeInfo.MediaType } else if fileBlock.FileType != nil { - documentBlock.Source.MediaType = fileBlock.FileType + documentBlock.Source.SourceObj.MediaType = fileBlock.FileType } return documentBlock } } // Default to base64 for binary files (raw base64 without prefix) - documentBlock.Source.Type = "base64" - documentBlock.Source.Data = &fileData + documentBlock.Source.SourceObj.Type = "base64" + documentBlock.Source.SourceObj.Data = &fileData // Set media type if fileBlock.FileType != nil { - documentBlock.Source.MediaType = fileBlock.FileType + documentBlock.Source.SourceObj.MediaType = fileBlock.FileType } else { // Default to PDF if not specified mediaType := "application/pdf" - documentBlock.Source.MediaType = &mediaType + documentBlock.Source.SourceObj.MediaType = &mediaType } return documentBlock } // Handle file URL if fileBlock.FileURL != nil && *fileBlock.FileURL != "" { - documentBlock.Source.Type = "url" - documentBlock.Source.URL = fileBlock.FileURL + documentBlock.Source.SourceObj.Type = "url" + documentBlock.Source.SourceObj.URL = fileBlock.FileURL return documentBlock } @@ -1295,22 +1902,24 @@ func (block AnthropicContentBlock) ToBifrostContentImageBlock() schemas.ChatCont } func getImageURLFromBlock(block AnthropicContentBlock) string { - if block.Source == nil { + // Image blocks always carry object-form sources (never string form). + if block.Source == nil || block.Source.SourceObj == nil { return "" } + src := block.Source.SourceObj // Handle base64 data - convert to data URL - if block.Source.Data != nil { + if src.Data != nil { mime := "image/png" - if block.Source.MediaType != nil && *block.Source.MediaType != "" { - mime = *block.Source.MediaType + if src.MediaType != nil && *src.MediaType != "" { + mime = *src.MediaType } - return "data:" + mime + ";base64," + *block.Source.Data + return "data:" + mime + ";base64," + *src.Data } // Handle regular URLs - if block.Source.URL != nil { - return *block.Source.URL + if src.URL != nil { + return *src.URL } return "" diff --git a/core/providers/anthropic/utils_test.go b/core/providers/anthropic/utils_test.go index f778468971..b63ed11ae1 100644 --- a/core/providers/anthropic/utils_test.go +++ b/core/providers/anthropic/utils_test.go @@ -772,19 +772,32 @@ func TestAddMissingBetaHeadersToContext_PerProvider(t *testing.T) { }, unexpectHeaders: []string{AnthropicInterleavedThinkingBetaHeader}, }, - // Fast mode tests + // Fast mode tests — fast mode is Opus 4.6 only (research preview), + // so tests must set Model to exercise the path. Non-Opus-4.6 models + // are model-gated out regardless of provider flag. { name: "Anthropic gets fast mode header", provider: schemas.Anthropic, req: &AnthropicMessageRequest{ + Model: "claude-opus-4-6", Speed: schemas.Ptr("fast"), }, expectHeaders: []string{AnthropicFastModeBetaHeader}, }, + { + name: "Anthropic skips fast mode header on non-Opus-4.6 model", + provider: schemas.Anthropic, + req: &AnthropicMessageRequest{ + Model: "claude-sonnet-4-6", + Speed: schemas.Ptr("fast"), + }, + unexpectHeaders: []string{AnthropicFastModeBetaHeader}, + }, { name: "Bedrock skips fast mode header", provider: schemas.Bedrock, req: &AnthropicMessageRequest{ + Model: "claude-opus-4-6", // fast mode is model-gated; set a supporting model so the test actually exercises provider suppression Speed: schemas.Ptr("fast"), }, unexpectHeaders: []string{AnthropicFastModeBetaHeader}, @@ -793,10 +806,63 @@ func TestAddMissingBetaHeadersToContext_PerProvider(t *testing.T) { name: "Azure skips fast mode header", provider: schemas.Azure, req: &AnthropicMessageRequest{ + Model: "claude-opus-4-6", // fast mode is model-gated; set a supporting model so the test actually exercises provider suppression Speed: schemas.Ptr("fast"), }, unexpectHeaders: []string{AnthropicFastModeBetaHeader}, }, + // Fine-grained tool streaming (eager_input_streaming) — per Table 20: + // GA on Anthropic / Bedrock / Vertex, Beta on Azure. All four should + // auto-inject fine-grained-tool-streaming-2025-05-14 when a tool has + // eager_input_streaming: true. + { + name: "Anthropic gets eager_input_streaming header", + provider: schemas.Anthropic, + req: &AnthropicMessageRequest{ + Tools: []AnthropicTool{{Name: "t1", EagerInputStreaming: schemas.Ptr(true)}}, + }, + expectHeaders: []string{AnthropicEagerInputStreamingBetaHeader}, + }, + { + name: "Bedrock gets eager_input_streaming header", + provider: schemas.Bedrock, + req: &AnthropicMessageRequest{ + Tools: []AnthropicTool{{Name: "t1", EagerInputStreaming: schemas.Ptr(true)}}, + }, + expectHeaders: []string{AnthropicEagerInputStreamingBetaHeader}, + }, + { + name: "Vertex gets eager_input_streaming header", + provider: schemas.Vertex, + req: &AnthropicMessageRequest{ + Tools: []AnthropicTool{{Name: "t1", EagerInputStreaming: schemas.Ptr(true)}}, + }, + expectHeaders: []string{AnthropicEagerInputStreamingBetaHeader}, + }, + { + name: "Azure gets eager_input_streaming header", + provider: schemas.Azure, + req: &AnthropicMessageRequest{ + Tools: []AnthropicTool{{Name: "t1", EagerInputStreaming: schemas.Ptr(true)}}, + }, + expectHeaders: []string{AnthropicEagerInputStreamingBetaHeader}, + }, + { + name: "eager_input_streaming header absent when flag is false", + provider: schemas.Anthropic, + req: &AnthropicMessageRequest{ + Tools: []AnthropicTool{{Name: "t1", EagerInputStreaming: schemas.Ptr(false)}}, + }, + unexpectHeaders: []string{AnthropicEagerInputStreamingBetaHeader}, + }, + { + name: "eager_input_streaming header absent when unset", + provider: schemas.Anthropic, + req: &AnthropicMessageRequest{ + Tools: []AnthropicTool{{Name: "t1"}}, + }, + unexpectHeaders: []string{AnthropicEagerInputStreamingBetaHeader}, + }, } for _, tt := range tests { @@ -998,6 +1064,7 @@ func TestFilterBetaHeadersForProvider(t *testing.T) { AnthropicContextManagementBetaHeader, AnthropicInterleavedThinkingBetaHeader, AnthropicContext1MBetaHeader, + AnthropicEagerInputStreamingBetaHeader, } result := FilterBetaHeadersForProvider(supported, schemas.Vertex) if len(result) != len(supported) { @@ -1049,6 +1116,7 @@ func TestFilterBetaHeadersForProvider(t *testing.T) { AnthropicSkillsBetaHeader, AnthropicContext1MBetaHeader, AnthropicRedactThinkingBetaHeader, + AnthropicEagerInputStreamingBetaHeader, } result := FilterBetaHeadersForProvider(supported, schemas.Azure) if len(result) != len(supported) { @@ -1064,6 +1132,7 @@ func TestFilterBetaHeadersForProvider(t *testing.T) { AnthropicContextManagementBetaHeader, AnthropicInterleavedThinkingBetaHeader, AnthropicContext1MBetaHeader, + AnthropicEagerInputStreamingBetaHeader, } result := FilterBetaHeadersForProvider(supported, schemas.Bedrock) if len(result) != len(supported) { @@ -1184,6 +1253,239 @@ func TestFilterBetaHeadersForProvider(t *testing.T) { } } +func TestStripUnsupportedFieldsFromRawBody(t *testing.T) { + t.Run("bedrock_strips_new_request_level_fields", func(t *testing.T) { + // Raw body with every new typed field. Targeting Bedrock: speed (no FastMode), + // inference_geo (no InferenceGeo), mcp_servers (no MCP), container.skills + // (no Skills), top-level cache_control.scope (no PromptCachingScope), + // output_config.task_budget (no TaskBudgets). All should be stripped. + input := []byte(`{ + "model":"claude-opus-4-6", + "speed":"fast", + "inference_geo":"us-east-1", + "mcp_servers":[{"type":"url","url":"https://example.com","name":"x"}], + "container":{"id":"c-1","skills":[{"skill_id":"s","type":"anthropic"}]}, + "cache_control":{"type":"ephemeral","ttl":"5m","scope":"user"}, + "output_config":{"task_budget":{"type":"tokens","total":20000}} + }`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Bedrock, "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, path := range []string{"speed", "inference_geo", "mcp_servers", "container", "cache_control.scope", "output_config.task_budget"} { + if providerUtils.JSONFieldExists(result, path) { + t.Errorf("expected %q to be stripped for Bedrock, got: %s", path, string(result)) + } + } + // Confirm non-scope cache_control fields are retained. + if !providerUtils.JSONFieldExists(result, "cache_control.ttl") { + t.Errorf("expected cache_control.ttl to survive, got: %s", string(result)) + } + }) + + t.Run("vertex_strips_mcp_strict_and_input_examples_via_feature_check", func(t *testing.T) { + // Vertex: no MCP, no InputExamples, no StructuredOutputs. + // tool.strict stripped; tool.input_examples stripped; mcp_servers stripped. + // tool.cache_control.scope stripped (Vertex has no PromptCachingScope). + input := []byte(`{ + "model":"claude-sonnet-4-6", + "mcp_servers":[{"type":"url","url":"u","name":"n"}], + "tools":[{"name":"t1","strict":true,"input_examples":[{"input":{"a":1}}],"cache_control":{"type":"ephemeral","scope":"user"}}] + }`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Vertex, "claude-sonnet-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, path := range []string{"mcp_servers", "tools.0.strict", "tools.0.input_examples", "tools.0.cache_control.scope"} { + if providerUtils.JSONFieldExists(result, path) { + t.Errorf("expected %q to be stripped for Vertex, got: %s", path, string(result)) + } + } + if !providerUtils.JSONFieldExists(result, "tools.0.name") { + t.Errorf("expected tool name to survive") + } + }) + + t.Run("bedrock_keeps_input_examples_via_standalone_flag", func(t *testing.T) { + // Bedrock has InputExamples=true via tool-examples-2025-10-29 but + // AdvancedToolUse=false. input_examples should be KEPT; defer_loading + // and allowed_callers (bundle-only) should be STRIPPED. + input := []byte(`{ + "model":"claude-opus-4-6", + "tools":[{"name":"t1","input_examples":[{"input":{"a":1}}],"defer_loading":true,"allowed_callers":["direct"]}] + }`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Bedrock, "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !providerUtils.JSONFieldExists(result, "tools.0.input_examples") { + t.Errorf("expected tools[0].input_examples to survive on Bedrock, got: %s", string(result)) + } + for _, path := range []string{"tools.0.defer_loading", "tools.0.allowed_callers"} { + if providerUtils.JSONFieldExists(result, path) { + t.Errorf("expected %q to be stripped for Bedrock (AdvancedToolUse bundle unsupported), got: %s", path, string(result)) + } + } + }) + + t.Run("speed_stripped_on_non_opus_46_even_on_anthropic", func(t *testing.T) { + // Model gate: fast-mode is Opus 4.6 only per docs. Even on Anthropic + // direct where FastMode=true, targeting a different model must strip. + input := []byte(`{"model":"claude-sonnet-4-6","speed":"fast"}`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Anthropic, "claude-sonnet-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if providerUtils.JSONFieldExists(result, "speed") { + t.Errorf("expected speed stripped for non-Opus-4.6 model on Anthropic, got: %s", string(result)) + } + }) + + t.Run("anthropic_direct_is_noop", func(t *testing.T) { + // Anthropic supports everything — body should survive untouched. + input := []byte(`{"model":"claude-opus-4-6","speed":"fast","mcp_servers":[{"type":"url","url":"u","name":"n"}],"container":{"id":"c"},"tools":[{"name":"t","defer_loading":true,"input_examples":[{"input":{"a":1}}]}]}`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Anthropic, "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, path := range []string{"speed", "mcp_servers", "container", "tools.0.defer_loading", "tools.0.input_examples"} { + if !providerUtils.JSONFieldExists(result, path) { + t.Errorf("expected %q preserved on Anthropic direct, got: %s", path, string(result)) + } + } + }) + + t.Run("nested_scope_stripped_on_messages_and_system", func(t *testing.T) { + // Nested scope on system blocks and message blocks must also be stripped + // when the provider lacks PromptCachingScope. + input := []byte(`{ + "model":"claude-opus-4-6", + "system":[{"type":"text","text":"hi","cache_control":{"type":"ephemeral","scope":"user"}}], + "messages":[{"role":"user","content":[{"type":"text","text":"q","cache_control":{"type":"ephemeral","scope":"global"}}]}] + }`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Bedrock, "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, path := range []string{"system.0.cache_control.scope", "messages.0.content.0.cache_control.scope"} { + if providerUtils.JSONFieldExists(result, path) { + t.Errorf("expected nested %q stripped, got: %s", path, string(result)) + } + } + }) + + t.Run("unknown_provider_is_safe_noop", func(t *testing.T) { + input := []byte(`{"model":"claude-opus-4-6","speed":"fast"}`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.ModelProvider("custom"), "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !providerUtils.JSONFieldExists(result, "speed") { + t.Errorf("expected speed preserved for unknown provider (safe default), got: %s", string(result)) + } + }) + + t.Run("container_empty_skills_stripped_but_container_preserved", func(t *testing.T) { + // Skills=false provider (Bedrock), ContainerBasic=true. + // skills:[] is a caller oversight — strip the empty key, preserve container. + input := []byte(`{"model":"claude-opus-4-6","container":{"id":"c-1","skills":[]}}`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Bedrock, "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if providerUtils.JSONFieldExists(result, "container.skills") { + t.Errorf("expected empty container.skills stripped on Skills=false provider, got: %s", string(result)) + } + if !providerUtils.JSONFieldExists(result, "container.id") { + t.Errorf("expected container.id preserved (bare form still valid), got: %s", string(result)) + } + }) + + t.Run("container_nonempty_skills_drops_whole_container", func(t *testing.T) { + // Non-empty skills signals caller intent; provider doesn't support — drop container. + input := []byte(`{"model":"claude-opus-4-6","container":{"id":"c-1","skills":[{"skill_id":"s","type":"anthropic"}]}}`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Bedrock, "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if providerUtils.JSONFieldExists(result, "container") { + t.Errorf("expected whole container dropped for non-empty skills on Skills=false, got: %s", string(result)) + } + }) + + t.Run("container_empty_skills_on_skills_capable_provider_preserved", func(t *testing.T) { + // On Anthropic direct (Skills=true), the empty skills array must be preserved + // as-is — our strip logic only fires when !features.Skills. + input := []byte(`{"model":"claude-opus-4-6","container":{"id":"c-1","skills":[]}}`) + result, err := stripUnsupportedFieldsFromRawBody(input, schemas.Anthropic, "claude-opus-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !providerUtils.JSONFieldExists(result, "container.skills") { + t.Errorf("expected container.skills preserved on Skills=true provider, got: %s", string(result)) + } + }) +} + +// TestStripUnsupportedAnthropicFields_ContainerSkillsGating mirrors the raw-path +// tests above on the typed path — ensures the typed sanitizer treats explicit +// empty skills arrays as a stripable (not drop-triggering) signal. +func TestStripUnsupportedAnthropicFields_ContainerSkillsGating(t *testing.T) { + t.Run("empty_skills_on_skills_false_provider_strips_skills_keeps_container", func(t *testing.T) { + req := &AnthropicMessageRequest{ + Model: "claude-opus-4-6", + Container: &AnthropicContainer{ + ContainerObject: &AnthropicContainerObject{ + ID: schemas.Ptr("c-1"), + Skills: []AnthropicContainerSkill{}, // explicit empty + }, + }, + } + stripUnsupportedAnthropicFields(req, schemas.Bedrock, "claude-opus-4-6") + if req.Container == nil { + t.Fatalf("expected container preserved (bare form valid with empty skills), got nil") + } + if req.Container.ContainerObject == nil || req.Container.ContainerObject.Skills != nil { + t.Errorf("expected skills cleared on Skills=false, got %v", req.Container.ContainerObject) + } + }) + + t.Run("nonempty_skills_on_skills_false_provider_drops_container", func(t *testing.T) { + req := &AnthropicMessageRequest{ + Model: "claude-opus-4-6", + Container: &AnthropicContainer{ + ContainerObject: &AnthropicContainerObject{ + ID: schemas.Ptr("c-1"), + Skills: []AnthropicContainerSkill{{SkillID: "s", Type: "anthropic"}}, + }, + }, + } + stripUnsupportedAnthropicFields(req, schemas.Bedrock, "claude-opus-4-6") + if req.Container != nil { + t.Errorf("expected whole container dropped for non-empty skills on Skills=false, got %v", req.Container) + } + }) + + t.Run("empty_skills_on_skills_true_provider_preserved", func(t *testing.T) { + req := &AnthropicMessageRequest{ + Model: "claude-opus-4-6", + Container: &AnthropicContainer{ + ContainerObject: &AnthropicContainerObject{ + ID: schemas.Ptr("c-1"), + Skills: []AnthropicContainerSkill{}, + }, + }, + } + stripUnsupportedAnthropicFields(req, schemas.Anthropic, "claude-opus-4-6") + if req.Container == nil || req.Container.ContainerObject == nil { + t.Fatalf("expected container preserved on Skills=true provider, got %v", req.Container) + } + if req.Container.ContainerObject.Skills == nil { + t.Errorf("expected empty skills preserved on Skills=true provider (not nilled)") + } + }) +} + func TestStripAutoInjectableTools(t *testing.T) { t.Run("code_execution_without_web_search_preserved", func(t *testing.T) { // code_execution alone should NOT be stripped (no web_search/web_fetch to trigger auto-injection) @@ -1474,6 +1776,39 @@ func TestAnthropicToolUnmarshalJSON_MCPToolset(t *testing.T) { }) } +func TestGetRequestBodyForResponses_RawBodyStripsFallbacks(t *testing.T) { + rawBody := []byte(`{"model":"claude-sonnet-4-5","max_tokens":1024,"messages":[{"role":"user","content":"hello"}],"fallbacks":["claude-haiku-4-5"],"temperature":0.7}`) + + ctx := schemas.NewBifrostContext(nil, time.Time{}) + ctx.SetValue(schemas.BifrostContextKeyUseRawRequestBody, true) + + request := &schemas.BifrostResponsesRequest{ + Provider: schemas.Anthropic, + Model: "claude-sonnet-4-5", + RawRequestBody: rawBody, + } + + result, bifrostErr := getRequestBodyForResponses(ctx, request, schemas.Anthropic, false, nil) + if bifrostErr != nil { + t.Fatalf("unexpected error: %v", bifrostErr) + } + + if providerUtils.GetJSONField(result, "fallbacks").Exists() { + t.Error("expected 'fallbacks' to be absent from raw-body output") + } + + // Other fields must survive the round-trip + if !providerUtils.GetJSONField(result, "model").Exists() { + t.Error("expected 'model' to be present") + } + if !providerUtils.GetJSONField(result, "max_tokens").Exists() { + t.Error("expected 'max_tokens' to be present") + } + if !providerUtils.GetJSONField(result, "temperature").Exists() { + t.Error("expected 'temperature' to be present") + } +} + func TestApplyMCPToolsetConfigToBifrostTool(t *testing.T) { t.Run("allowlist pattern merges correctly", func(t *testing.T) { bifrostTool := &schemas.ResponsesTool{ @@ -1541,3 +1876,109 @@ func TestApplyMCPToolsetConfigToBifrostTool(t *testing.T) { applyMCPToolsetConfigToBifrostTool(&schemas.ResponsesTool{}, nil) }) } + +func TestSupportsAdaptiveThinking(t *testing.T) { + tests := []struct { + model string + expected bool + }{ + {"claude-opus-4-7-20260401", true}, + {"claude-opus-4.7-20260401", true}, + {"claude-opus-4-6-20250514", true}, + {"claude-opus-4.6-20250514", true}, + {"claude-sonnet-4-6-20250514", true}, + {"claude-sonnet-4.6-20250514", true}, + {"claude-opus-4-5-20241022", false}, + {"claude-sonnet-4-5-20241022", false}, + {"claude-haiku-4-6-20250514", false}, // haiku does not support adaptive + {"claude-haiku-4-7-20260401", false}, // haiku, not opus + {"", false}, + } + + for _, tt := range tests { + t.Run(tt.model, func(t *testing.T) { + got := SupportsAdaptiveThinking(tt.model) + if got != tt.expected { + t.Errorf("SupportsAdaptiveThinking(%q) = %v, want %v", tt.model, got, tt.expected) + } + }) + } +} + +func TestAddMissingBetaHeadersToContext_TaskBudgets(t *testing.T) { + tests := []struct { + name string + provider schemas.ModelProvider + req *AnthropicMessageRequest + expectHeaders []string + unexpectHeaders []string + }{ + { + name: "Anthropic gets task-budgets header when task_budget set", + provider: schemas.Anthropic, + req: &AnthropicMessageRequest{ + OutputConfig: &AnthropicOutputConfig{ + TaskBudget: &AnthropicTaskBudget{Type: "tokens", Total: 50000}, + }, + }, + expectHeaders: []string{AnthropicTaskBudgetsBetaHeader}, + }, + { + name: "Vertex does not get task-budgets header when task_budget set", + provider: schemas.Vertex, + req: &AnthropicMessageRequest{ + OutputConfig: &AnthropicOutputConfig{ + TaskBudget: &AnthropicTaskBudget{Type: "tokens", Total: 50000}, + }, + }, + unexpectHeaders: []string{AnthropicTaskBudgetsBetaHeader}, + }, + { + name: "no task-budgets header when task_budget is nil", + provider: schemas.Anthropic, + req: &AnthropicMessageRequest{ + OutputConfig: &AnthropicOutputConfig{}, + }, + unexpectHeaders: []string{AnthropicTaskBudgetsBetaHeader}, + }, + { + name: "no task-budgets header when output_config is nil", + provider: schemas.Anthropic, + req: &AnthropicMessageRequest{}, + unexpectHeaders: []string{AnthropicTaskBudgetsBetaHeader}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := schemas.NewBifrostContext(nil, time.Time{}) + AddMissingBetaHeadersToContext(ctx, tt.req, tt.provider) + + var headers []string + if extraHeaders, ok := ctx.Value(schemas.BifrostContextKeyExtraHeaders).(map[string][]string); ok { + headers = extraHeaders[AnthropicBetaHeader] + } + + for _, expected := range tt.expectHeaders { + found := false + for _, h := range headers { + if h == expected { + found = true + break + } + } + if !found { + t.Errorf("expected header %q not found in %v", expected, headers) + } + } + + for _, unexpected := range tt.unexpectHeaders { + for _, h := range headers { + if h == unexpected { + t.Errorf("unexpected header %q found in %v", unexpected, headers) + } + } + } + }) + } +} diff --git a/core/providers/anthropic/validate_chat_tools_test.go b/core/providers/anthropic/validate_chat_tools_test.go new file mode 100644 index 0000000000..d9f0c8a2df --- /dev/null +++ b/core/providers/anthropic/validate_chat_tools_test.go @@ -0,0 +1,138 @@ +package anthropic + +import ( + "testing" + + "github.com/maximhq/bifrost/core/schemas" +) + +// TestValidateChatToolsForProvider locks in the partition: +// function/custom tools always survive; server tools survive only when the +// target provider's ProviderFeatures flag is true for that tool type. +func TestValidateChatToolsForProvider(t *testing.T) { + fnTool := schemas.ChatTool{ + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{Name: "get_weather"}, + } + serverTool := func(tpe, name string) schemas.ChatTool { + return schemas.ChatTool{Type: schemas.ChatToolType(tpe), Name: name} + } + + cases := []struct { + name string + provider schemas.ModelProvider + input []schemas.ChatTool + wantKeep int + wantDropped []string + assertNotes string + }{ + { + name: "function tools always survive on any provider", + provider: schemas.Bedrock, + input: []schemas.ChatTool{fnTool, fnTool}, + wantKeep: 2, + }, + { + name: "bedrock drops web_search", + provider: schemas.Bedrock, + input: []schemas.ChatTool{serverTool("web_search_20260209", "web_search")}, + wantKeep: 0, + wantDropped: []string{"web_search_20260209"}, + assertNotes: "Bedrock has WebSearch=false per Table 20 (AWS user guide beta-header list + Anthropic overview)", + }, + { + name: "bedrock drops web_fetch + code_execution + mcp_toolset", + provider: schemas.Bedrock, + input: []schemas.ChatTool{ + serverTool("web_fetch_20260309", "web_fetch"), + serverTool("code_execution_20250825", "code_execution"), + serverTool("mcp_toolset", "notion"), + }, + wantKeep: 0, + wantDropped: []string{"web_fetch_20260309", "code_execution_20250825", "mcp_toolset"}, + }, + { + name: "bedrock keeps computer/bash/memory/text_editor/tool_search", + provider: schemas.Bedrock, + input: []schemas.ChatTool{ + serverTool("computer_20251124", "computer"), + serverTool("bash_20250124", "bash"), + serverTool("memory_20250818", "memory"), + serverTool("text_editor_20250728", "str_replace_based_edit_tool"), + serverTool("tool_search_tool_bm25", "tool_search_tool_bm25"), + }, + wantKeep: 5, + }, + { + name: "bedrock partial drop mixes function + server tools", + provider: schemas.Bedrock, + input: []schemas.ChatTool{ + fnTool, + serverTool("web_search_20260209", "web_search"), + serverTool("bash_20250124", "bash"), + }, + wantKeep: 2, // fnTool + bash + wantDropped: []string{"web_search_20260209"}, + }, + { + name: "vertex drops web_fetch", + provider: schemas.Vertex, + input: []schemas.ChatTool{serverTool("web_fetch_20260309", "web_fetch")}, + wantKeep: 0, + wantDropped: []string{"web_fetch_20260309"}, + assertNotes: "Vertex has WebFetch=false per Table 20", + }, + { + name: "vertex drops mcp_toolset", + provider: schemas.Vertex, + input: []schemas.ChatTool{serverTool("mcp_toolset", "notion")}, + wantKeep: 0, + wantDropped: []string{"mcp_toolset"}, + assertNotes: "Vertex has MCP=false per MCP-excl (explicit exclusion in Anthropic docs)", + }, + { + name: "anthropic keeps everything", + provider: schemas.Anthropic, + input: []schemas.ChatTool{ + serverTool("web_search_20260209", "web_search"), + serverTool("web_fetch_20260309", "web_fetch"), + serverTool("code_execution_20250825", "code_execution"), + serverTool("mcp_toolset", "x"), + serverTool("computer_20251124", "computer"), + }, + wantKeep: 5, + }, + { + name: "unknown provider keeps everything (forward-compat)", + provider: schemas.ModelProvider("custom-new-provider"), + input: []schemas.ChatTool{serverTool("web_search_20260209", "web_search")}, + wantKeep: 1, + }, + { + name: "unknown tool type on known provider is kept (forward-compat)", + provider: schemas.Bedrock, + input: []schemas.ChatTool{serverTool("future_tool_20270101", "future")}, + wantKeep: 1, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + keep, dropped := ValidateChatToolsForProvider(tc.input, tc.provider) + if len(keep) != tc.wantKeep { + t.Errorf("keep count: got %d, want %d (%s)", len(keep), tc.wantKeep, tc.assertNotes) + } + if len(dropped) != len(tc.wantDropped) { + t.Errorf("dropped count: got %v, want %v", dropped, tc.wantDropped) + } + for i, d := range tc.wantDropped { + if i >= len(dropped) { + break + } + if dropped[i] != d { + t.Errorf("dropped[%d]: got %q, want %q", i, dropped[i], d) + } + } + }) + } +} diff --git a/core/providers/anthropic/websearch_test.go b/core/providers/anthropic/websearch_test.go new file mode 100644 index 0000000000..48c4214c40 --- /dev/null +++ b/core/providers/anthropic/websearch_test.go @@ -0,0 +1,270 @@ +package anthropic + +import ( + "encoding/json" + "testing" + + "github.com/maximhq/bifrost/core/schemas" +) + +// TestWebSearch_OutputItemAdded_StoresID verifies that a WebSearch function_call +// output_item.added event stores the item ID in the per-request stream state so that +// subsequent argument deltas can be skipped. +func TestWebSearch_OutputItemAdded_StoresID(t *testing.T) { + t.Parallel() + + const itemID = "toolu_ws_storesid_test" + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + + bifrostResp := &schemas.BifrostResponsesStreamResponse{ + Type: schemas.ResponsesStreamResponseTypeOutputItemAdded, + OutputIndex: schemas.Ptr(0), + Item: &schemas.ResponsesMessage{ + ID: schemas.Ptr(itemID), + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(itemID), + Name: schemas.Ptr("WebSearch"), + Arguments: schemas.Ptr(""), + }, + }, + } + + events := ToAnthropicResponsesStreamResponse(ctx, bifrostResp) + + // Should emit content_block_start + if len(events) == 0 { + t.Fatal("expected at least one event") + } + if events[0].Type != AnthropicStreamEventTypeContentBlockStart { + t.Errorf("event[0].Type = %v, want content_block_start", events[0].Type) + } + if events[0].ContentBlock == nil || events[0].ContentBlock.Input == nil { + t.Fatal("expected ContentBlock with Input") + } + if string(events[0].ContentBlock.Input) != "{}" { + t.Errorf("ContentBlock.Input = %s, want {}", events[0].ContentBlock.Input) + } + + // ID must now be tracked in per-request state + state := getOrCreateAnthropicToResponsesStreamState(ctx) + if !state.webSearchItemIDs[itemID] { + t.Error("expected item ID to be stored in per-request stream state after output_item.added") + } +} + +// TestWebSearch_FunctionCallArgumentsDelta_Skipped verifies that argument deltas +// for a tracked WebSearch item are skipped (returning nil) regardless of the +// user agent — the fix for the original bug where non-Claude Code clients lost +// the query. +func TestWebSearch_FunctionCallArgumentsDelta_Skipped(t *testing.T) { + t.Parallel() + + const itemID = "toolu_ws_skip_test" + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + + // Pre-seed per-request state as if output_item.added already fired + state := getOrCreateAnthropicToResponsesStreamState(ctx) + state.webSearchItemIDs = map[string]bool{itemID: true} + + partial := `{"query": "world news"` + bifrostResp := &schemas.BifrostResponsesStreamResponse{ + Type: schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta, + OutputIndex: schemas.Ptr(0), + ItemID: schemas.Ptr(itemID), + Delta: &partial, + } + + events := ToAnthropicResponsesStreamResponse(ctx, bifrostResp) + + if len(events) != 0 { + t.Errorf("expected deltas to be skipped (0 events), got %d", len(events)) + } +} + +// TestWebSearch_OutputItemDone_GeneratesSyntheticDeltas verifies that when +// output_item.done fires for a tracked WebSearch item, synthetic input_json_delta +// events carrying the full query are emitted, followed by content_block_stop. +// This applies for ALL clients regardless of user agent. +func TestWebSearch_OutputItemDone_GeneratesSyntheticDeltas(t *testing.T) { + t.Parallel() + + const itemID = "toolu_ws_synth_test" + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + + // Pre-seed per-request state as if output_item.added already fired + state := getOrCreateAnthropicToResponsesStreamState(ctx) + state.webSearchItemIDs = map[string]bool{itemID: true} + + query := `{"query":"world news today"}` + bifrostResp := &schemas.BifrostResponsesStreamResponse{ + Type: schemas.ResponsesStreamResponseTypeOutputItemDone, + OutputIndex: schemas.Ptr(1), + Item: &schemas.ResponsesMessage{ + ID: schemas.Ptr(itemID), + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(itemID), + Name: schemas.Ptr("WebSearch"), + Arguments: &query, + }, + }, + } + + events := ToAnthropicResponsesStreamResponse(ctx, bifrostResp) + + // Must have at least one input_json_delta and a final content_block_stop + if len(events) < 2 { + t.Fatalf("expected at least 2 events (deltas + stop), got %d", len(events)) + } + + // All events except last must be input_json_delta + for i, ev := range events[:len(events)-1] { + if ev.Type != AnthropicStreamEventTypeContentBlockDelta { + t.Errorf("event[%d].Type = %v, want content_block_delta", i, ev.Type) + continue + } + if ev.Delta == nil || ev.Delta.Type != AnthropicStreamDeltaTypeInputJSON { + t.Errorf("event[%d].Delta.Type = %v, want input_json", i, ev.Delta) + } + } + + // Last event must be content_block_stop + last := events[len(events)-1] + if last.Type != AnthropicStreamEventTypeContentBlockStop { + t.Errorf("last event.Type = %v, want content_block_stop", last.Type) + } + + // Reconstruct the accumulated JSON from the deltas + var accumulated string + for _, ev := range events[:len(events)-1] { + if ev.Delta != nil && ev.Delta.PartialJSON != nil { + accumulated += *ev.Delta.PartialJSON + } + } + var got map[string]interface{} + if err := json.Unmarshal([]byte(accumulated), &got); err != nil { + t.Fatalf("accumulated JSON invalid: %v — got %q", err, accumulated) + } + if got["query"] != "world news today" { + t.Errorf("query = %v, want %q", got["query"], "world news today") + } + + // ID must have been cleaned up from per-request state + if state.webSearchItemIDs[itemID] { + t.Error("expected item ID to be removed from per-request stream state after output_item.done") + } +} + +// TestWebSearch_FullFlow_AnyUserAgent is the regression test for the original bug. +// It simulates the complete streaming sequence: +// +// output_item.added → FunctionCallArgumentsDelta (×N) → output_item.done +// +// and verifies that the client-facing Anthropic stream contains proper +// input_json_delta events with the query, regardless of user agent. +func TestWebSearch_FullFlow_AnyUserAgent(t *testing.T) { + t.Parallel() + + const itemID = "toolu_ws_fullflow_test" + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + + var allEvents []*AnthropicStreamEvent + + // Step 1: output_item.added + addedResp := &schemas.BifrostResponsesStreamResponse{ + Type: schemas.ResponsesStreamResponseTypeOutputItemAdded, + OutputIndex: schemas.Ptr(0), + Item: &schemas.ResponsesMessage{ + ID: schemas.Ptr(itemID), + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(itemID), + Name: schemas.Ptr("WebSearch"), + Arguments: schemas.Ptr(""), + }, + }, + } + allEvents = append(allEvents, ToAnthropicResponsesStreamResponse(ctx, addedResp)...) + + // Step 2: FunctionCallArgumentsDelta events (should be skipped) + for _, partial := range []string{`{"query": "`, `latest AI`, `news"}`} { + p := partial + deltaResp := &schemas.BifrostResponsesStreamResponse{ + Type: schemas.ResponsesStreamResponseTypeFunctionCallArgumentsDelta, + OutputIndex: schemas.Ptr(0), + ItemID: schemas.Ptr(itemID), + Delta: &p, + } + allEvents = append(allEvents, ToAnthropicResponsesStreamResponse(ctx, deltaResp)...) + } + + // Step 3: output_item.done with full accumulated arguments + fullArgs := `{"query":"latest AI news"}` + doneResp := &schemas.BifrostResponsesStreamResponse{ + Type: schemas.ResponsesStreamResponseTypeOutputItemDone, + OutputIndex: schemas.Ptr(0), + Item: &schemas.ResponsesMessage{ + ID: schemas.Ptr(itemID), + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(itemID), + Name: schemas.Ptr("WebSearch"), + Arguments: &fullArgs, + }, + }, + } + allEvents = append(allEvents, ToAnthropicResponsesStreamResponse(ctx, doneResp)...) + + // Verify the sequence: + // [0] content_block_start (input:{}) + // [1..N-1] input_json_delta events + // [N] content_block_stop + if len(allEvents) < 3 { + t.Fatalf("expected at least 3 events, got %d: %v", len(allEvents), allEvents) + } + + // First event: content_block_start with empty input + if allEvents[0].Type != AnthropicStreamEventTypeContentBlockStart { + t.Errorf("allEvents[0].Type = %v, want content_block_start", allEvents[0].Type) + } + + // Last event: content_block_stop + last := allEvents[len(allEvents)-1] + if last.Type != AnthropicStreamEventTypeContentBlockStop { + t.Errorf("last event.Type = %v, want content_block_stop", last.Type) + } + + // Middle events: all input_json_delta + for i, ev := range allEvents[1 : len(allEvents)-1] { + if ev.Type != AnthropicStreamEventTypeContentBlockDelta { + t.Errorf("allEvents[%d].Type = %v, want content_block_delta", i+1, ev.Type) + } + if ev.Delta == nil || ev.Delta.Type != AnthropicStreamDeltaTypeInputJSON { + t.Errorf("allEvents[%d].Delta.Type = %v, want input_json", i+1, ev.Delta) + } + } + + // Reconstruct query from synthetic deltas + var accumulated string + for _, ev := range allEvents[1 : len(allEvents)-1] { + if ev.Delta != nil && ev.Delta.PartialJSON != nil { + accumulated += *ev.Delta.PartialJSON + } + } + var got map[string]interface{} + if err := json.Unmarshal([]byte(accumulated), &got); err != nil { + t.Fatalf("reconstructed JSON is invalid: %v — got %q", err, accumulated) + } + if got["query"] != "latest AI news" { + t.Errorf("reconstructed query = %v, want %q", got["query"], "latest AI news") + } +} diff --git a/core/providers/azure/azure.go b/core/providers/azure/azure.go index 9649caa662..9d7c3063d7 100644 --- a/core/providers/azure/azure.go +++ b/core/providers/azure/azure.go @@ -1207,6 +1207,7 @@ func (provider *AzureProvider) SpeechStream(ctx *schemas.BifrostContext, postHoo // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, provider.GetProviderKey(), request.Model, schemas.SpeechStreamRequest, provider.logger) @@ -3298,6 +3299,7 @@ func (provider *AzureProvider) PassthroughStream( ch := make(chan *schemas.BifrostStreamChunk, schemas.DefaultStreamBufferSize) go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, ch, provider.GetProviderKey(), req.Model, schemas.PassthroughStreamRequest, provider.logger) diff --git a/core/providers/azure/azure_test.go b/core/providers/azure/azure_test.go index b1080b7db3..5727cb343d 100644 --- a/core/providers/azure/azure_test.go +++ b/core/providers/azure/azure_test.go @@ -78,8 +78,10 @@ func TestAzure(t *testing.T) { VideoRemix: false, VideoList: false, VideoDelete: false, - InterleavedThinking: true, - PassthroughAPI: true, + InterleavedThinking: true, + PassthroughAPI: true, + EagerInputStreaming: true, // fine-grained-tool-streaming-2025-05-14 (Beta on Azure Foundry) + ServerToolsViaOpenAIEndpoint: true, // web_search / web_fetch / code_execution on Azure per Table 20 }, DisableParallelFor: []string{"Transcription"}, // Azure Whisper has 3 calls/minute quota } diff --git a/core/providers/bedrock/bedrock.go b/core/providers/bedrock/bedrock.go index 3cddc86340..2a181068a2 100644 --- a/core/providers/bedrock/bedrock.go +++ b/core/providers/bedrock/bedrock.go @@ -949,6 +949,7 @@ func (provider *BedrockProvider) TextCompletionStream(ctx *schemas.BifrostContex // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.TextCompletionStreamRequest, provider.logger) @@ -1204,6 +1205,7 @@ func (provider *BedrockProvider) ChatCompletionStream(ctx *schemas.BifrostContex // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ChatCompletionStreamRequest, provider.logger) @@ -1592,6 +1594,7 @@ func (provider *BedrockProvider) ResponsesStream(ctx *schemas.BifrostContext, po // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ResponsesStreamRequest, provider.logger) diff --git a/core/providers/bedrock/bedrock_test.go b/core/providers/bedrock/bedrock_test.go index 1949051c44..dc4b4d6d69 100644 --- a/core/providers/bedrock/bedrock_test.go +++ b/core/providers/bedrock/bedrock_test.go @@ -15,7 +15,10 @@ import ( ) func mustMarshalJSON(v interface{}) json.RawMessage { - b, _ := json.Marshal(v) + b, err := json.Marshal(v) + if err != nil { + panic("mustMarshalJSON: " + err.Error()) + } return json.RawMessage(b) } @@ -40,7 +43,10 @@ func jsonEqual(t *testing.T, expected, actual json.RawMessage, msgAndArgs ...int // mustMarshalToolParams marshals ToolFunctionParameters to json.RawMessage, // matching the conversion code path for deterministic output. func mustMarshalToolParams(params *schemas.ToolFunctionParameters) json.RawMessage { - b, _ := json.Marshal(params) + b, err := json.Marshal(params) + if err != nil { + panic("mustMarshalToolParams: " + err.Error()) + } return json.RawMessage(b) } @@ -222,6 +228,9 @@ func TestBedrock(t *testing.T) { ImageVariation: true, StructuredOutputs: true, InterleavedThinking: true, + EagerInputStreaming: true, // fine-grained-tool-streaming-2025-05-14 (per B-header) + // ServerToolsViaOpenAIEndpoint: Bedrock does not support web_search / web_fetch / + // code_execution server tools per Table 20, so no cases would run. Left off. }, } @@ -1256,7 +1265,7 @@ func TestBedrockToBifrostRequestConversion(t *testing.T) { ToolUse: &bedrock.BedrockToolUse{ ToolUseID: "tool-use-123", Name: "get_weather", - Input: json.RawMessage(`{"location":"NYC"}`), + Input: json.RawMessage(`{"location":"NYC"}`), }, }, }, @@ -1331,7 +1340,7 @@ func TestBedrockToBifrostRequestConversion(t *testing.T) { ToolUse: &bedrock.BedrockToolUse{ ToolUseID: "tool-use-456", Name: "calculate", - Input: json.RawMessage(`{"expression":"2+2"}`), + Input: json.RawMessage(`{"expression":"2+2"}`), }, }, }, @@ -1860,7 +1869,7 @@ func TestBifrostToBedrockResponseConversion(t *testing.T) { ToolUse: &bedrock.BedrockToolUse{ ToolUseID: "call-111", Name: "get_weather", - Input: json.RawMessage(`{"location":"NYC"}`), + Input: json.RawMessage(`{"location":"NYC"}`), }, }, { @@ -2222,6 +2231,173 @@ func TestToBedrockResponsesRequest_AdditionalFields_InterfaceSlice(t *testing.T) assert.Equal(t, []string{"/amazon-bedrock-invocationMetrics/inputTokenCount"}, bedrockReq.AdditionalModelResponseFieldPaths) } +func TestToBedrockResponsesRequest_AnthropicTextFormatUsesOutputConfig(t *testing.T) { + schemaObj := any(schemas.NewOrderedMapFromPairs( + schemas.KV("type", "object"), + schemas.KV("properties", schemas.NewOrderedMapFromPairs( + schemas.KV("topic", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "string"), + )), + )), + schemas.KV("required", []string{"topic"}), + )) + + req := &schemas.BifrostResponsesRequest{ + Model: "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + Params: &schemas.ResponsesParameters{ + Text: &schemas.ResponsesTextConfig{ + Format: &schemas.ResponsesTextConfigFormat{ + Type: "json_schema", + Name: schemas.Ptr("classification"), + JSONSchema: &schemas.ResponsesTextConfigFormatJSONSchema{ + Schema: &schemaObj, + }, + }, + }, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + bedrockReq, err := bedrock.ToBedrockResponsesRequest(ctx, req) + require.NoError(t, err) + require.NotNil(t, bedrockReq) + require.NotNil(t, bedrockReq.AdditionalModelRequestFields, "expected additional model request fields for anthropic responses structured output") + + outputConfigRaw, hasOutputConfig := bedrockReq.AdditionalModelRequestFields.Get("output_config") + require.True(t, hasOutputConfig, "expected output_config for anthropic responses structured output") + + outputConfig, ok := schemas.SafeExtractOrderedMap(outputConfigRaw) + require.True(t, ok, "expected output_config to be an ordered map") + + formatRaw, hasFormat := outputConfig.Get("format") + require.True(t, hasFormat, "expected output_config.format") + + formatMap, ok := schemas.SafeExtractOrderedMap(formatRaw) + require.True(t, ok, "expected output_config.format to be an ordered map") + + formatType, ok := formatMap.Get("type") + require.True(t, ok, "expected output_config.format.type") + assert.Equal(t, "json_schema", formatType) + + schemaRaw, ok := formatMap.Get("schema") + require.True(t, ok, "expected output_config.format.schema") + schemaMap, ok := schemas.SafeExtractOrderedMap(schemaRaw) + require.True(t, ok, "expected output_config.format.schema to remain ordered") + require.NotNil(t, schemaMap) + + if bedrockReq.ToolConfig != nil { + assert.Nil(t, bedrockReq.ToolConfig.ToolChoice, "expected no forced tool choice for anthropic responses structured output") + assert.Empty(t, bedrockReq.ToolConfig.Tools, "expected no synthetic structured output tool for anthropic responses structured output") + } +} + +func TestToBedrockResponsesRequest_NonAnthropicTextFormatStillUsesToolConversion(t *testing.T) { + schemaObj := any(schemas.NewOrderedMapFromPairs( + schemas.KV("type", "object"), + schemas.KV("properties", schemas.NewOrderedMapFromPairs( + schemas.KV("topic", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "string"), + )), + )), + schemas.KV("required", []string{"topic"}), + )) + + req := &schemas.BifrostResponsesRequest{ + Model: "bedrock/amazon.nova-pro-v1:0", + Params: &schemas.ResponsesParameters{ + Text: &schemas.ResponsesTextConfig{ + Format: &schemas.ResponsesTextConfigFormat{ + Type: "json_schema", + Name: schemas.Ptr("classification"), + JSONSchema: &schemas.ResponsesTextConfigFormatJSONSchema{ + Schema: &schemaObj, + }, + }, + }, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + bedrockReq, err := bedrock.ToBedrockResponsesRequest(ctx, req) + require.NoError(t, err) + require.NotNil(t, bedrockReq) + + if bedrockReq.AdditionalModelRequestFields != nil { + _, hasOutputConfig := bedrockReq.AdditionalModelRequestFields.Get("output_config") + assert.False(t, hasOutputConfig, "expected no output_config for non-anthropic responses structured output") + } + + require.NotNil(t, bedrockReq.ToolConfig, "expected tool_config for non-anthropic responses structured output") + require.NotEmpty(t, bedrockReq.ToolConfig.Tools, "expected synthetic structured output tool to be added") + require.NotNil(t, bedrockReq.ToolConfig.ToolChoice, "expected structured output tool choice to be forced") + require.NotNil(t, bedrockReq.ToolConfig.ToolChoice.Tool, "expected structured output tool choice to target the synthetic tool") + assert.Contains(t, bedrockReq.ToolConfig.ToolChoice.Tool.Name, "bf_so_", "expected forced tool choice to target the synthetic structured output tool") +} + +func TestToBedrockResponsesRequest_NonAnthropicTextFormatPreservedWithUserTools(t *testing.T) { + schemaObj := any(schemas.NewOrderedMapFromPairs( + schemas.KV("type", "object"), + schemas.KV("properties", schemas.NewOrderedMapFromPairs( + schemas.KV("topic", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "string"), + )), + )), + schemas.KV("required", []string{"topic"}), + )) + + toolParams := schemas.ToolFunctionParameters{ + Type: "object", + Properties: schemas.NewOrderedMapFromPairs( + schemas.KV("city", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "string"), + )), + ), + } + + req := &schemas.BifrostResponsesRequest{ + Model: "bedrock/amazon.nova-pro-v1:0", + Params: &schemas.ResponsesParameters{ + Text: &schemas.ResponsesTextConfig{ + Format: &schemas.ResponsesTextConfigFormat{ + Type: "json_schema", + Name: schemas.Ptr("classification"), + JSONSchema: &schemas.ResponsesTextConfigFormatJSONSchema{ + Schema: &schemaObj, + }, + }, + }, + Tools: []schemas.ResponsesTool{ + { + Type: schemas.ResponsesToolTypeFunction, + Name: schemas.Ptr("get_weather"), + Description: schemas.Ptr("Get weather information"), + ResponsesToolFunction: &schemas.ResponsesToolFunction{ + Parameters: &toolParams, + }, + }, + }, + ToolChoice: &schemas.ResponsesToolChoice{ + ResponsesToolChoiceStruct: &schemas.ResponsesToolChoiceStruct{ + Type: schemas.ResponsesToolChoiceTypeFunction, + Name: schemas.Ptr("get_weather"), + }, + }, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + bedrockReq, err := bedrock.ToBedrockResponsesRequest(ctx, req) + require.NoError(t, err) + require.NotNil(t, bedrockReq) + require.NotNil(t, bedrockReq.ToolConfig, "expected tool_config to be initialized") + require.Len(t, bedrockReq.ToolConfig.Tools, 2, "expected synthetic structured output tool plus user tool") + require.NotNil(t, bedrockReq.ToolConfig.ToolChoice, "expected structured output tool choice to be forced") + require.NotNil(t, bedrockReq.ToolConfig.ToolChoice.Tool, "expected structured output tool choice to target the synthetic tool") + assert.Equal(t, "bf_so_classification", bedrockReq.ToolConfig.ToolChoice.Tool.Name) + assert.Equal(t, "bf_so_classification", bedrockReq.ToolConfig.Tools[0].ToolSpec.Name) + assert.Equal(t, "get_weather", bedrockReq.ToolConfig.Tools[1].ToolSpec.Name) +} + // TestToolResultJSONParsingResponsesAPI tests that tool results are correctly parsed and wrapped based on JSON type // Tests only Responses API. func TestToolResultJSONParsingResponsesAPI(t *testing.T) { @@ -2248,7 +2424,7 @@ func TestToolResultJSONParsingResponsesAPI(t *testing.T) { name: "JSONObjectResult", toolResultContent: `{"location":"NYC","temperature":72}`, expectedContentType: "json", - expectedJSON: mustMarshalJSON(map[string]any{"location": "NYC", "temperature": float64(72)}), + expectedJSON: mustMarshalJSON(map[string]any{"location": "NYC", "temperature": float64(72)}), }, { name: "JSONArrayResult", @@ -2265,37 +2441,37 @@ func TestToolResultJSONParsingResponsesAPI(t *testing.T) { name: "JSONPrimitiveNumberResult", toolResultContent: `42`, expectedContentType: "json", - expectedJSON: mustMarshalJSON(map[string]any{"value": float64(42)}), + expectedJSON: mustMarshalJSON(map[string]any{"value": float64(42)}), }, { name: "JSONPrimitiveStringResult", toolResultContent: `"hello world"`, expectedContentType: "json", - expectedJSON: mustMarshalJSON(map[string]any{"value": "hello world"}), + expectedJSON: mustMarshalJSON(map[string]any{"value": "hello world"}), }, { name: "JSONPrimitiveBooleanResult", toolResultContent: `true`, expectedContentType: "json", - expectedJSON: mustMarshalJSON(map[string]any{"value": true}), + expectedJSON: mustMarshalJSON(map[string]any{"value": true}), }, { name: "JSONPrimitiveNullResult", toolResultContent: `null`, expectedContentType: "json", - expectedJSON: mustMarshalJSON(map[string]any{"value": nil}), + expectedJSON: mustMarshalJSON(map[string]any{"value": nil}), }, { name: "EmptyJSONObjectResult", toolResultContent: `{}`, expectedContentType: "json", - expectedJSON: mustMarshalJSON(map[string]any{}), + expectedJSON: mustMarshalJSON(map[string]any{}), }, { name: "EmptyJSONArrayResult", toolResultContent: `[]`, expectedContentType: "json", - expectedJSON: mustMarshalJSON(map[string]any{"results": []any{}}), + expectedJSON: mustMarshalJSON(map[string]any{"results": []any{}}), }, } @@ -2887,6 +3063,379 @@ func TestAnthropicReasoningConfigUsesThinkingField(t *testing.T) { } } +func TestAnthropicOrderedOutputConfigRoundTripsReasoning(t *testing.T) { + request := &bedrock.BedrockConverseRequest{ + ModelID: "anthropic.claude-opus-4-6-v1", + Messages: []bedrock.BedrockMessage{ + { + Role: bedrock.BedrockMessageRoleUser, + Content: []bedrock.BedrockContentBlock{ + { + Text: schemas.Ptr("Hello"), + }, + }, + }, + }, + AdditionalModelRequestFields: schemas.NewOrderedMapFromPairs( + schemas.KV("thinking", map[string]any{ + "type": "adaptive", + "budget_tokens": 2048, + }), + schemas.KV("output_config", schemas.NewOrderedMapFromPairs( + schemas.KV("effort", "medium"), + )), + ), + ExtraParams: map[string]any{ + "reasoning_summary": "auto", + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + result, err := request.ToBifrostResponsesRequest(ctx) + require.NoError(t, err) + require.NotNil(t, result) + require.NotNil(t, result.Params) + require.NotNil(t, result.Params.Reasoning) + require.NotNil(t, result.Params.Reasoning.Effort) + assert.Equal(t, "medium", *result.Params.Reasoning.Effort) + require.NotNil(t, result.Params.Reasoning.MaxTokens) + assert.Equal(t, 2048, *result.Params.Reasoning.MaxTokens) + require.NotNil(t, result.Params.Reasoning.Summary) + assert.Equal(t, "auto", *result.Params.Reasoning.Summary) +} + +func TestAnthropicOutputConfigFormatStillFallsBackToBudgetTokensForReasoning(t *testing.T) { + request := &bedrock.BedrockConverseRequest{ + ModelID: "anthropic.claude-opus-4-6-v1", + Messages: []bedrock.BedrockMessage{ + { + Role: bedrock.BedrockMessageRoleUser, + Content: []bedrock.BedrockContentBlock{ + { + Text: schemas.Ptr("Hello"), + }, + }, + }, + }, + AdditionalModelRequestFields: schemas.NewOrderedMapFromPairs( + schemas.KV("thinking", map[string]any{ + "type": "adaptive", + "budget_tokens": 2048, + }), + schemas.KV("output_config", schemas.NewOrderedMapFromPairs( + schemas.KV("format", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "json_schema"), + schemas.KV("schema", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "object"), + )), + )), + )), + ), + ExtraParams: map[string]any{ + "reasoning_summary": "auto", + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + result, err := request.ToBifrostResponsesRequest(ctx) + require.NoError(t, err) + require.NotNil(t, result) + require.NotNil(t, result.Params) + require.NotNil(t, result.Params.Reasoning) + require.NotNil(t, result.Params.Reasoning.Effort) + // Effort is inferred from budget_tokens (2048) against the model-specific max output tokens + // (128K for claude-opus-4-6) minus Anthropic's minimum reasoning budget (1024). That ratio + // (~0.008) falls in the "low" bucket — see providerUtils.GetReasoningEffortFromBudgetTokens. + assert.Equal(t, "low", *result.Params.Reasoning.Effort) + require.NotNil(t, result.Params.Reasoning.MaxTokens) + assert.Equal(t, 2048, *result.Params.Reasoning.MaxTokens) + require.NotNil(t, result.Params.Reasoning.Summary) + assert.Equal(t, "auto", *result.Params.Reasoning.Summary) +} + +// TestAnthropicStructuredOutputUsesOutputConfigWithoutForcedToolChoice ensures +// Anthropic Bedrock structured output uses native output_config.format and does +// not synthesize a forced tool choice, while keeping reasoning (thinking) active. +func TestAnthropicStructuredOutputUsesOutputConfigWithoutForcedToolChoice(t *testing.T) { + responseFormat := any(map[string]any{ + "type": "json_schema", + "json_schema": map[string]any{ + "name": "classification", + "schema": map[string]any{ + "type": "object", + "properties": map[string]any{ + "topic": map[string]any{ + "type": "string", + }, + }, + "required": []any{"topic"}, + }, + }, + }) + + bifrostReq := &schemas.BifrostChatRequest{ + Model: "anthropic.claude-3-7-sonnet-v1", + Input: []schemas.ChatMessage{ + { + Role: schemas.ChatMessageRoleUser, + Content: &schemas.ChatMessageContent{ + ContentStr: schemas.Ptr("Classify this"), + }, + }, + }, + Params: &schemas.ChatParameters{ + ResponseFormat: &responseFormat, + Reasoning: &schemas.ChatReasoning{ + MaxTokens: schemas.Ptr(2048), + }, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + result, err := bedrock.ToBedrockChatCompletionRequest(ctx, bifrostReq) + require.NoError(t, err) + require.NotNil(t, result) + require.NotNil(t, result.AdditionalModelRequestFields) + + outputConfigRaw, hasOutputConfig := result.AdditionalModelRequestFields.Get("output_config") + require.True(t, hasOutputConfig, "expected output_config for anthropic structured output") + + outputConfig, ok := outputConfigRaw.(*schemas.OrderedMap) + require.True(t, ok, "expected output_config to be an ordered map") + + formatRaw, hasFormat := outputConfig.Get("format") + require.True(t, hasFormat, "expected output_config.format") + + format, ok := formatRaw.(*schemas.OrderedMap) + require.True(t, ok, "expected output_config.format to be an ordered map") + formatType, hasType := format.Get("type") + require.True(t, hasType, "expected output_config.format.type") + assert.Equal(t, "json_schema", formatType) + _, hasSchema := format.Get("schema") + assert.True(t, hasSchema, "expected output_config.format.schema") + + // reasoning should still be preserved for anthropic + thinkingRaw, hasThinking := result.AdditionalModelRequestFields.Get("thinking") + require.True(t, hasThinking, "expected thinking field for anthropic reasoning") + thinking, ok := thinkingRaw.(map[string]any) + require.True(t, ok, "expected thinking to be a map") + assert.Equal(t, "enabled", thinking["type"]) + + // structured output should NOT force tool choice on Bedrock anthropic + if result.ToolConfig != nil { + assert.Nil(t, result.ToolConfig.ToolChoice, "expected no forced tool choice for anthropic structured output") + assert.Empty(t, result.ToolConfig.Tools, "expected no synthetic structured output tool for anthropic structured output") + } +} + +func TestAnthropicStructuredOutputAcceptsOrderedMaps(t *testing.T) { + responseFormat := any(schemas.NewOrderedMapFromPairs( + schemas.KV("type", "json_schema"), + schemas.KV("json_schema", schemas.NewOrderedMapFromPairs( + schemas.KV("name", "classification"), + schemas.KV("schema", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "object"), + schemas.KV("description", "Return structured classification"), + schemas.KV("properties", schemas.NewOrderedMapFromPairs( + schemas.KV("topic", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "string"), + )), + )), + schemas.KV("required", []any{"topic"}), + )), + )), + )) + + bifrostReq := &schemas.BifrostChatRequest{ + Model: "anthropic.claude-3-7-sonnet-v1", + Input: []schemas.ChatMessage{ + { + Role: schemas.ChatMessageRoleUser, + Content: &schemas.ChatMessageContent{ + ContentStr: schemas.Ptr("Classify this"), + }, + }, + }, + Params: &schemas.ChatParameters{ + ResponseFormat: &responseFormat, + Reasoning: &schemas.ChatReasoning{ + MaxTokens: schemas.Ptr(2048), + }, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + result, err := bedrock.ToBedrockChatCompletionRequest(ctx, bifrostReq) + require.NoError(t, err) + require.NotNil(t, result) + require.NotNil(t, result.AdditionalModelRequestFields) + + outputConfigRaw, hasOutputConfig := result.AdditionalModelRequestFields.Get("output_config") + require.True(t, hasOutputConfig, "expected output_config for anthropic structured output") + + outputConfig, ok := outputConfigRaw.(*schemas.OrderedMap) + require.True(t, ok, "expected output_config to be an ordered map") + + formatRaw, hasFormat := outputConfig.Get("format") + require.True(t, hasFormat, "expected output_config.format") + + format, ok := formatRaw.(*schemas.OrderedMap) + require.True(t, ok, "expected output_config.format to be an ordered map") + + formatType, ok := format.Get("type") + require.True(t, ok, "expected output_config.format.type") + assert.Equal(t, "json_schema", formatType) + + schemaRaw, ok := format.Get("schema") + require.True(t, ok, "expected output_config.format.schema") + _, ok = schemaRaw.(*schemas.OrderedMap) + require.True(t, ok, "expected output_config.format.schema to remain ordered") +} + +// TestNonAnthropicStructuredOutputStillUsesToolConversion ensures Bedrock models +// other than Anthropic continue to use the legacy response_format->tool path. +func TestNonAnthropicStructuredOutputStillUsesToolConversion(t *testing.T) { + responseFormat := any(schemas.NewOrderedMapFromPairs( + schemas.KV("type", "json_schema"), + schemas.KV("json_schema", schemas.NewOrderedMapFromPairs( + schemas.KV("name", "classification"), + schemas.KV("schema", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "object"), + schemas.KV("description", "Return structured classification"), + schemas.KV("properties", schemas.NewOrderedMapFromPairs( + schemas.KV("topic", schemas.NewOrderedMapFromPairs( + schemas.KV("type", "string"), + )), + )), + schemas.KV("required", []any{"topic"}), + )), + )), + )) + + bifrostReq := &schemas.BifrostChatRequest{ + Model: "amazon.nova-pro-v1", + Input: []schemas.ChatMessage{ + { + Role: schemas.ChatMessageRoleUser, + Content: &schemas.ChatMessageContent{ + ContentStr: schemas.Ptr("Classify this"), + }, + }, + }, + Params: &schemas.ChatParameters{ + ResponseFormat: &responseFormat, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + result, err := bedrock.ToBedrockChatCompletionRequest(ctx, bifrostReq) + require.NoError(t, err) + require.NotNil(t, result) + + // Non-Anthropic models should not use output_config.format. + if result.AdditionalModelRequestFields != nil { + _, hasOutputConfig := result.AdditionalModelRequestFields.Get("output_config") + assert.False(t, hasOutputConfig, "expected no output_config for non-anthropic structured output") + } + + require.NotNil(t, result.ToolConfig, "expected tool_config for non-anthropic structured output") + require.NotEmpty(t, result.ToolConfig.Tools, "expected synthetic structured output tool to be added") + require.NotNil(t, result.ToolConfig.ToolChoice, "expected structured output tool choice to be forced") + require.NotNil(t, result.ToolConfig.ToolChoice.Tool, "expected structured output tool choice to target the synthetic tool") + assert.Equal(t, "bf_so_classification", result.ToolConfig.ToolChoice.Tool.Name) + assert.Equal(t, "bf_so_classification", result.ToolConfig.Tools[0].ToolSpec.Name) + + schemaRaw := result.ToolConfig.Tools[0].ToolSpec.InputSchema.JSON + var schema schemas.OrderedMap + require.NoError(t, schema.UnmarshalJSON(schemaRaw)) + schemaType, ok := schema.Get("type") + require.True(t, ok, "expected tool schema type") + assert.Equal(t, "object", schemaType) +} + +// TestAnthropicStructuredOutputMergesAdditionalModelRequestFieldPaths ensures +// additionalModelRequestFieldPaths are merged into existing AdditionalModelRequestFields +// and output_config is deep-merged instead of overwritten. +func TestAnthropicStructuredOutputMergesAdditionalModelRequestFieldPaths(t *testing.T) { + responseFormat := any(map[string]any{ + "type": "json_schema", + "json_schema": map[string]any{ + "name": "classification", + "schema": map[string]any{ + "type": "object", + "properties": map[string]any{ + "topic": map[string]any{ + "type": "string", + }, + }, + "required": []any{"topic"}, + }, + }, + }) + + bifrostReq := &schemas.BifrostChatRequest{ + Model: "anthropic.claude-3-7-sonnet-v1", + Input: []schemas.ChatMessage{ + { + Role: schemas.ChatMessageRoleUser, + Content: &schemas.ChatMessageContent{ + ContentStr: schemas.Ptr("Classify this"), + }, + }, + }, + Params: &schemas.ChatParameters{ + ResponseFormat: &responseFormat, + Reasoning: &schemas.ChatReasoning{ + MaxTokens: schemas.Ptr(2048), + }, + ExtraParams: map[string]any{ + "additionalModelRequestFieldPaths": schemas.NewOrderedMapFromPairs( + schemas.KV("output_config", map[string]any{ + "foo": "bar", + }), + schemas.KV("customField", "customValue"), + ), + }, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + result, err := bedrock.ToBedrockChatCompletionRequest(ctx, bifrostReq) + require.NoError(t, err) + require.NotNil(t, result) + require.NotNil(t, result.AdditionalModelRequestFields) + + outputConfigRaw, hasOutputConfig := result.AdditionalModelRequestFields.Get("output_config") + require.True(t, hasOutputConfig, "expected output_config to exist after merge") + outputConfig, ok := outputConfigRaw.(*schemas.OrderedMap) + require.True(t, ok, "expected output_config to be an ordered map") + + // Existing structured output format must be preserved. + formatRaw, hasFormat := outputConfig.Get("format") + require.True(t, hasFormat, "expected output_config.format to be preserved") + format, ok := formatRaw.(*schemas.OrderedMap) + require.True(t, ok, "expected output_config.format to be an ordered map") + formatType, hasType := format.Get("type") + require.True(t, hasType, "expected output_config.format.type") + assert.Equal(t, "json_schema", formatType) + _, hasSchema := format.Get("schema") + assert.True(t, hasSchema, "expected output_config.format.schema") + + // Incoming additionalModelRequestFieldPaths.output_config key must be merged. + foo, hasFoo := outputConfig.Get("foo") + require.True(t, hasFoo, "expected output_config.foo to be preserved") + assert.Equal(t, "bar", foo) + + // Existing top-level field (thinking) must not be lost. + _, hasThinking := result.AdditionalModelRequestFields.Get("thinking") + assert.True(t, hasThinking, "expected thinking to be preserved") + + // Incoming top-level keys must be merged. + customField, hasCustomField := result.AdditionalModelRequestFields.Get("customField") + require.True(t, hasCustomField, "expected customField to be merged") + assert.Equal(t, "customValue", customField) +} + // TestNovaReasoningConfigUsesReasoningConfigField verifies that Nova models use // the "reasoningConfig" field (camelCase) and NOT "thinking". func TestNovaReasoningConfigUsesReasoningConfigField(t *testing.T) { @@ -3641,7 +4190,7 @@ func TestToBedrockInvokeMessagesStreamResponse_NoDuplicateContentBlockStop(t *te } type bedrockChunk struct { - InvokeModelRawChunk []byte `json:"invokeModelRawChunk"` + InvokeModelRawChunks [][]byte `json:"invokeModelRawChunks"` } var stopCount int @@ -3655,11 +4204,123 @@ func TestToBedrockInvokeMessagesStreamResponse_NoDuplicateContentBlockStop(t *te require.NoError(t, err) var chunk bedrockChunk require.NoError(t, json.Unmarshal(raw, &chunk)) - if len(chunk.InvokeModelRawChunk) > 0 && - strings.Contains(string(chunk.InvokeModelRawChunk), "content_block_stop") { - stopCount++ + for _, rawChunk := range chunk.InvokeModelRawChunks { + if strings.Contains(string(rawChunk), "content_block_stop") { + stopCount++ + } } } assert.Equal(t, 1, stopCount, "expected exactly one content_block_stop event, got %d", stopCount) } + +func TestToolResultImageContentResponsesAPI(t *testing.T) { + // Minimal 1x1 red PNG + pngBase64 := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGP4z8AAAAMBAQDJ/pLvAAAAAElFTkSuQmCC" + + t.Run("ImageBlockPreservedInToolResult", func(t *testing.T) { + input := []schemas.ResponsesMessage{ + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("tooluse_screenshot_001"), + Output: &schemas.ResponsesToolMessageOutputStruct{ + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesInputMessageContentBlockTypeImage, + ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{ + ImageURL: schemas.Ptr("data:image/png;base64," + pngBase64), + }, + }, + }, + }, + }, + }, + } + + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(input) + require.NoError(t, err) + require.Len(t, messages, 1) + + toolResultMsg := messages[0] + assert.Equal(t, bedrock.BedrockMessageRoleUser, toolResultMsg.Role) + require.Len(t, toolResultMsg.Content, 1) + + toolResult := toolResultMsg.Content[0].ToolResult + require.NotNil(t, toolResult, "expected tool result in content block") + assert.Equal(t, "tooluse_screenshot_001", toolResult.ToolUseID) + require.Len(t, toolResult.Content, 1, "tool result should contain exactly one content block") + + imageBlock := toolResult.Content[0] + require.NotNil(t, imageBlock.Image, "tool result content should be an image") + assert.Equal(t, "png", imageBlock.Image.Format) + require.NotNil(t, imageBlock.Image.Source.Bytes) + assert.Equal(t, pngBase64, *imageBlock.Image.Source.Bytes) + }) + + t.Run("MixedTextAndImageBlocksPreserved", func(t *testing.T) { + input := []schemas.ResponsesMessage{ + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("tooluse_mixed_002"), + Output: &schemas.ResponsesToolMessageOutputStruct{ + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesOutputMessageContentTypeText, + Text: schemas.Ptr("Screenshot captured successfully"), + }, + { + Type: schemas.ResponsesInputMessageContentBlockTypeImage, + ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{ + ImageURL: schemas.Ptr("data:image/png;base64," + pngBase64), + }, + }, + }, + }, + }, + }, + } + + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(input) + require.NoError(t, err) + require.Len(t, messages, 1) + + toolResult := messages[0].Content[0].ToolResult + require.NotNil(t, toolResult) + require.Len(t, toolResult.Content, 2, "both text and image blocks should be preserved") + + assert.NotNil(t, toolResult.Content[0].Text, "first block should be text") + assert.NotNil(t, toolResult.Content[1].Image, "second block should be image") + assert.Equal(t, "png", toolResult.Content[1].Image.Format) + }) + + t.Run("RemoteURLImageGracefullyDropped", func(t *testing.T) { + input := []schemas.ResponsesMessage{ + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("tooluse_remote_003"), + Output: &schemas.ResponsesToolMessageOutputStruct{ + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesInputMessageContentBlockTypeImage, + ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{ + ImageURL: schemas.Ptr("https://example.com/screenshot.png"), + }, + }, + }, + }, + }, + }, + } + + messages, _, err := bedrock.ConvertBifrostMessagesToBedrockMessages(input) + require.NoError(t, err) + require.Len(t, messages, 1) + + toolResult := messages[0].Content[0].ToolResult + require.NotNil(t, toolResult) + assert.Empty(t, toolResult.Content, "remote URL image should be dropped (Bedrock only supports base64)") + }) +} diff --git a/core/providers/bedrock/convert_tool_config_test.go b/core/providers/bedrock/convert_tool_config_test.go new file mode 100644 index 0000000000..fc417394e5 --- /dev/null +++ b/core/providers/bedrock/convert_tool_config_test.go @@ -0,0 +1,477 @@ +package bedrock + +import ( + "context" + "encoding/json" + "strings" + "testing" + + "github.com/maximhq/bifrost/core/schemas" +) + +// TestConvertToolConfig_DropsServerToolsOnBedrock locks in the bug fix from +// the user-reported repro: sending `web_search_20260209` via the OpenAI- +// compatible /v1/chat/completions endpoint to Bedrock was producing a +// malformed ToolConfig that Bedrock rejected with 400 "The provided request +// is not valid". The fix strips unsupported server tools before the +// conversion loop so the outbound request is valid. +func TestConvertToolConfig_DropsServerToolsOnBedrock(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "get_weather", + Description: schemas.Ptr("Get weather by city"), + Parameters: &schemas.ToolFunctionParameters{ + Type: "object", + }, + }, + }, + { + // Server tool — Bedrock doesn't support web_search per Table 20. + // Should be stripped silently. + Type: schemas.ChatToolType("web_search_20260209"), + Name: "web_search", + }, + }, + } + + cfg := convertToolConfig("global.anthropic.claude-sonnet-4-6", params) + if cfg == nil { + t.Fatalf("expected ToolConfig, got nil (function tool should have survived)") + } + if len(cfg.Tools) != 1 { + t.Fatalf("expected exactly 1 tool (function), got %d: %+v", len(cfg.Tools), cfg.Tools) + } + if cfg.Tools[0].ToolSpec == nil || cfg.Tools[0].ToolSpec.Name != "get_weather" { + t.Errorf("expected function tool 'get_weather' to survive, got %+v", cfg.Tools[0]) + } +} + +// TestConvertToolConfig_ReturnsNilWhenAllDropped locks in the empty-slice +// guard. Bedrock's Converse API rejects `"toolConfig": {"tools": []}` with a +// 400; when every tool is unsupported and gets stripped, convertToolConfig +// must return nil so no ToolConfig ships at all. +func TestConvertToolConfig_ReturnsNilWhenAllDropped(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("web_search_20260209"), + Name: "web_search", + }, + { + Type: schemas.ChatToolType("web_fetch_20260309"), + Name: "web_fetch", + }, + { + Type: schemas.ChatToolType("code_execution_20250825"), + Name: "code_execution", + }, + }, + } + + cfg := convertToolConfig("global.anthropic.claude-sonnet-4-6", params) + if cfg != nil { + t.Fatalf("expected nil ToolConfig (all tools unsupported on Bedrock), got %+v", cfg) + } +} + +// TestConvertToolConfig_KeepsBedrockSupportedServerTools — locks in that +// Bedrock-supported server tools (bash, memory, text_editor, computer, +// tool_search) do NOT appear in Converse's typed toolConfig.tools slot — +// they must be tunneled via additionalModelRequestFields (exercised in +// TestCollectBedrockServerTools_*). If the only tool is a server tool, +// toolConfig is nil so we don't ship {"toolConfig": {"tools": []}}. +func TestConvertToolConfig_KeepsBedrockSupportedServerTools(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("bash_20250124"), + Name: "bash", + }, + }, + } + + cfg := convertToolConfig("global.anthropic.claude-sonnet-4-6", params) + if cfg != nil { + t.Fatalf("expected nil toolConfig (server tools flow via additionalModelRequestFields, not toolSpec), got %+v", cfg) + } +} + +// TestCollectBedrockServerTools_BashOnly — bash is Bedrock-supported per the +// B-header list; the helper must emit it as a native-JSON tool entry with no +// derived beta header (bash has no high-confidence 1:1 beta-header mapping; +// callers rely on extra-headers for that). +func TestCollectBedrockServerTools_BashOnly(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("bash_20250124"), + Name: "bash", + }, + }, + } + tools, betas := collectBedrockServerTools(params) + if len(tools) != 1 { + t.Fatalf("expected 1 server tool, got %d", len(tools)) + } + got := string(tools[0]) + if !strings.Contains(got, `"type":"bash_20250124"`) || !strings.Contains(got, `"name":"bash"`) { + t.Errorf("expected native Anthropic bash shape, got %s", got) + } + if len(betas) != 0 { + t.Errorf("expected no derived beta headers for bash (no 1:1 mapping), got %v", betas) + } +} + +// TestCollectBedrockServerTools_ComputerDerivesBeta — computer_YYYYMMDD must +// derive computer-use-YYYY-MM-DD as the beta header, gated through +// FilterBetaHeadersForProvider(Bedrock) which keeps computer-use-* headers. +func TestCollectBedrockServerTools_ComputerDerivesBeta(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("computer_20251124"), + Name: "computer", + DisplayWidthPx: schemas.Ptr(1280), + DisplayHeightPx: schemas.Ptr(800), + }, + }, + } + tools, betas := collectBedrockServerTools(params) + if len(tools) != 1 { + t.Fatalf("expected 1 server tool, got %d", len(tools)) + } + if !strings.Contains(string(tools[0]), `"display_width_px":1280`) { + t.Errorf("expected computer variant fields to flow through, got %s", string(tools[0])) + } + if len(betas) != 1 || betas[0] != "computer-use-2025-11-24" { + t.Errorf("expected [computer-use-2025-11-24], got %v", betas) + } +} + +// TestCollectBedrockServerTools_MemoryDerivesContextManagement — memory +// activates via the context-management-2025-06-27 bundle on Bedrock (cite: +// anthropic/types.go:179). +func TestCollectBedrockServerTools_MemoryDerivesContextManagement(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("memory_20250818"), + Name: "memory", + }, + }, + } + _, betas := collectBedrockServerTools(params) + if len(betas) != 1 || betas[0] != "context-management-2025-06-27" { + t.Errorf("expected [context-management-2025-06-27], got %v", betas) + } +} + +// TestCollectBedrockServerTools_StripsUnsupported — web_search isn't in +// Bedrock's ProviderFeatures (WebSearch=false), so ValidateChatToolsForProvider +// drops it and the helper must emit nothing. +func TestCollectBedrockServerTools_StripsUnsupported(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("web_search_20260209"), + Name: "web_search", + }, + }, + } + tools, betas := collectBedrockServerTools(params) + if len(tools) != 0 { + t.Errorf("expected no server tools (web_search unsupported on Bedrock), got %d", len(tools)) + } + if len(betas) != 0 { + t.Errorf("expected no betas when all tools filtered, got %v", betas) + } +} + +// TestCollectBedrockServerTools_FunctionToolsIgnored — function/custom tools +// go through convertToolConfig, not this helper. +func TestCollectBedrockServerTools_FunctionToolsIgnored(t *testing.T) { + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "get_weather", + Parameters: &schemas.ToolFunctionParameters{ + Type: "object", + }, + }, + }, + }, + } + tools, betas := collectBedrockServerTools(params) + if len(tools) != 0 || len(betas) != 0 { + t.Errorf("function tools should not flow through server-tool helper, got tools=%d betas=%v", len(tools), betas) + } +} + +// TestBuildBedrockServerToolChoice_PinnedServerTool — caller pins a kept +// server tool (computer) by name. Converse's typed toolConfig.toolChoice path +// can't carry this because toolConfig.tools doesn't include server tools; the +// existing reconciliation silently drops the pin. The tunneled path must +// emit {"type":"tool","name":"computer"} into additionalModelRequestFields. +func TestBuildBedrockServerToolChoice_PinnedServerTool(t *testing.T) { + computer := schemas.ChatTool{ + Type: schemas.ChatToolType("computer_20251124"), + Name: "computer", + DisplayWidthPx: schemas.Ptr(1280), + } + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{computer}, + ToolChoice: &schemas.ChatToolChoice{ + ChatToolChoiceStruct: &schemas.ChatToolChoiceStruct{ + Type: schemas.ChatToolChoiceTypeFunction, + Function: &schemas.ChatToolChoiceFunction{Name: "computer"}, + }, + }, + } + choice, ok := buildBedrockServerToolChoice(params, []schemas.ChatTool{computer}) + if !ok { + t.Fatalf("expected tunneled tool_choice for pinned server tool, got (nil, false)") + } + got := string(choice) + if !strings.Contains(got, `"type":"tool"`) || !strings.Contains(got, `"name":"computer"`) { + t.Errorf("expected Anthropic-native {type:tool,name:computer}, got %s", got) + } +} + +// TestBuildBedrockServerToolChoice_PinnedFunctionTool_NotTunneled — function +// tool pins stay on Converse's typed path (toolConfig.toolChoice.tool). The +// helper must not double-emit. +func TestBuildBedrockServerToolChoice_PinnedFunctionTool_NotTunneled(t *testing.T) { + fn := schemas.ChatTool{ + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "get_weather", + Parameters: &schemas.ToolFunctionParameters{Type: "object"}, + }, + } + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{fn}, + ToolChoice: &schemas.ChatToolChoice{ + ChatToolChoiceStruct: &schemas.ChatToolChoiceStruct{ + Type: schemas.ChatToolChoiceTypeFunction, + Function: &schemas.ChatToolChoiceFunction{Name: "get_weather"}, + }, + }, + } + if _, ok := buildBedrockServerToolChoice(params, []schemas.ChatTool{fn}); ok { + t.Errorf("expected no tunneling for function-tool pin (typed Converse path handles it)") + } +} + +// TestBuildBedrockServerToolChoice_AnyWithOnlyServerTools — tool_choice:any +// with only server tools: convertToolConfig returns nil (bedrockTools empty), +// so the typed any-contract is lost. The tunneled path must emit +// {"type":"any"} to preserve the forcing semantics. +func TestBuildBedrockServerToolChoice_AnyWithOnlyServerTools(t *testing.T) { + bash := schemas.ChatTool{ + Type: schemas.ChatToolType("bash_20250124"), + Name: "bash", + } + anyStr := string(schemas.ChatToolChoiceTypeAny) + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{bash}, + ToolChoice: &schemas.ChatToolChoice{ + ChatToolChoiceStr: &anyStr, + }, + } + choice, ok := buildBedrockServerToolChoice(params, []schemas.ChatTool{bash}) + if !ok { + t.Fatalf("expected tunneled any-contract when only server tools are present, got (nil, false)") + } + got := string(choice) + if !strings.Contains(got, `"type":"any"`) { + t.Errorf("expected {type:any}, got %s", got) + } +} + +// TestBuildBedrockServerToolChoice_AnyWithFunctionTool_NotTunneled — when at +// least one function/custom tool is present, Converse's typed +// toolConfig.toolChoice.any carries the any-contract. Don't double-emit. +func TestBuildBedrockServerToolChoice_AnyWithFunctionTool_NotTunneled(t *testing.T) { + fn := schemas.ChatTool{ + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "get_weather", + Parameters: &schemas.ToolFunctionParameters{Type: "object"}, + }, + } + bash := schemas.ChatTool{ + Type: schemas.ChatToolType("bash_20250124"), + Name: "bash", + } + anyStr := string(schemas.ChatToolChoiceTypeAny) + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{fn, bash}, + ToolChoice: &schemas.ChatToolChoice{ + ChatToolChoiceStr: &anyStr, + }, + } + if _, ok := buildBedrockServerToolChoice(params, []schemas.ChatTool{fn, bash}); ok { + t.Errorf("expected no tunneling when function/custom tool is present (typed Converse path handles any)") + } +} + +// TestBuildBedrockServerToolChoice_UnsupportedServerToolPin_NotTunneled — the +// caller pins web_search, which ValidateChatToolsForProvider strips on +// Bedrock. The pin name is absent from the filtered set; the helper must not +// fabricate a tunneled tool_choice for a tool that isn't in the request. +func TestBuildBedrockServerToolChoice_UnsupportedServerToolPin_NotTunneled(t *testing.T) { + // The caller's original request had web_search, but it's been stripped. + // We pass the filtered slice (empty for the server-tool axis) to mimic + // the convertChatParameters call path. + params := &schemas.ChatParameters{ + Tools: []schemas.ChatTool{{Type: schemas.ChatToolType("web_search_20260209"), Name: "web_search"}}, + ToolChoice: &schemas.ChatToolChoice{ + ChatToolChoiceStruct: &schemas.ChatToolChoiceStruct{ + Type: schemas.ChatToolChoiceTypeFunction, + Function: &schemas.ChatToolChoiceFunction{Name: "web_search"}, + }, + }, + } + // Filtered (post-ValidateChatToolsForProvider(Bedrock)) — web_search is dropped. + filtered := []schemas.ChatTool{} + if _, ok := buildBedrockServerToolChoice(params, filtered); ok { + t.Errorf("expected no tunneling when pinned name was stripped by provider validation") + } +} + +// TestConvertChatParameters_PinnedServerToolE2E — end-to-end verification +// that convertChatParameters composes convertToolConfig + +// collectBedrockServerTools + buildBedrockServerToolChoice such that a +// request pinning a kept server tool produces: +// - AdditionalModelRequestFields.tools containing the server tool +// - AdditionalModelRequestFields.tool_choice with Anthropic-native shape +// - ToolConfig nil (no function tools → Converse's typed path is inactive) +func TestConvertChatParameters_PinnedServerToolE2E(t *testing.T) { + bifrostReq := &schemas.BifrostChatRequest{ + Model: "global.anthropic.claude-sonnet-4-6", + Params: &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("computer_20251124"), + Name: "computer", + DisplayWidthPx: schemas.Ptr(1280), + }, + }, + ToolChoice: &schemas.ChatToolChoice{ + ChatToolChoiceStruct: &schemas.ChatToolChoiceStruct{ + Type: schemas.ChatToolChoiceTypeFunction, + Function: &schemas.ChatToolChoiceFunction{Name: "computer"}, + }, + }, + }, + } + bedrockReq := &BedrockConverseRequest{} + if err := convertChatParameters(nil, bifrostReq, bedrockReq); err != nil { + t.Fatalf("convertChatParameters failed: %v", err) + } + if bedrockReq.ToolConfig != nil { + t.Errorf("expected nil ToolConfig (no function/custom tools), got %+v", bedrockReq.ToolConfig) + } + if bedrockReq.AdditionalModelRequestFields == nil { + t.Fatalf("expected AdditionalModelRequestFields to carry server-tool payload, got nil") + } + tools, ok := bedrockReq.AdditionalModelRequestFields.Get("tools") + if !ok { + t.Errorf("expected additionalModelRequestFields.tools to be set for server tool") + } else if toolsSlice, castOK := tools.([]json.RawMessage); !castOK || len(toolsSlice) != 1 { + t.Errorf("expected 1 server tool in additionalModelRequestFields.tools, got %+v", tools) + } + choice, ok := bedrockReq.AdditionalModelRequestFields.Get("tool_choice") + if !ok { + t.Fatalf("expected additionalModelRequestFields.tool_choice to carry pinned server-tool contract") + } + choiceRaw, castOK := choice.(json.RawMessage) + if !castOK { + t.Fatalf("expected tool_choice value to be json.RawMessage, got %T", choice) + } + got := string(choiceRaw) + if !strings.Contains(got, `"type":"tool"`) || !strings.Contains(got, `"name":"computer"`) { + t.Errorf("expected {type:tool,name:computer}, got %s", got) + } +} + +// TestConvertChatParameters_ResponseFormatWithPinnedServerTool_NoConflictingChoice +// locks in the fix for the "two conflicting tool-choice directives" hazard: +// when response_format forces the synthetic bf_so_* tool via +// ToolConfig.ToolChoice, the tunneled additionalModelRequestFields.tool_choice +// (which would pin a server tool) must be suppressed so Bedrock doesn't +// receive both pins in the same Converse call. Uses a Nova model since +// Anthropic models route response_format through native output_config.format +// (no synthetic tool), so the conflict only surfaces on non-Anthropic +// Bedrock targets. +func TestConvertChatParameters_ResponseFormatWithPinnedServerTool_NoConflictingChoice(t *testing.T) { + responseFormat := any(map[string]any{ + "type": "json_schema", + "json_schema": map[string]any{ + "name": "classification", + "schema": map[string]any{ + "type": "object", + "properties": map[string]any{ + "topic": map[string]any{"type": "string"}, + }, + "required": []any{"topic"}, + }, + }, + }) + + bifrostReq := &schemas.BifrostChatRequest{ + Model: "amazon.nova-pro-v1:0", + Params: &schemas.ChatParameters{ + ResponseFormat: &responseFormat, + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolType("bash_20250124"), + Name: "bash", + }, + }, + ToolChoice: &schemas.ChatToolChoice{ + ChatToolChoiceStruct: &schemas.ChatToolChoiceStruct{ + Type: schemas.ChatToolChoiceTypeFunction, + Function: &schemas.ChatToolChoiceFunction{Name: "bash"}, + }, + }, + }, + } + + ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + bedrockReq := &BedrockConverseRequest{} + if err := convertChatParameters(ctx, bifrostReq, bedrockReq); err != nil { + t.Fatalf("convertChatParameters failed: %v", err) + } + + // Synthetic bf_so_* tool must be injected and pinned via Converse's typed path. + if bedrockReq.ToolConfig == nil { + t.Fatalf("expected ToolConfig with synthetic bf_so_* tool, got nil") + } + if bedrockReq.ToolConfig.ToolChoice == nil || bedrockReq.ToolConfig.ToolChoice.Tool == nil { + t.Fatalf("expected ToolConfig.ToolChoice.Tool to pin synthetic structured-output tool, got %+v", bedrockReq.ToolConfig.ToolChoice) + } + if !strings.HasPrefix(bedrockReq.ToolConfig.ToolChoice.Tool.Name, "bf_so_") { + t.Errorf("expected ToolConfig.ToolChoice.Tool.Name to start with bf_so_, got %q", bedrockReq.ToolConfig.ToolChoice.Tool.Name) + } + + // Server tool must still be tunneled so the model has it available. + if bedrockReq.AdditionalModelRequestFields == nil { + t.Fatalf("expected AdditionalModelRequestFields to carry tunneled server-tool payload, got nil") + } + if _, ok := bedrockReq.AdditionalModelRequestFields.Get("tools"); !ok { + t.Errorf("expected additionalModelRequestFields.tools to still carry bash server tool") + } + + // Guarded field: tunneled tool_choice MUST be absent because response_format + // forces the synthetic tool. Two tool-choice directives in the same request + // would let Bedrock pick one and silently violate the structured-output contract. + if _, ok := bedrockReq.AdditionalModelRequestFields.Get("tool_choice"); ok { + t.Errorf("expected NO additionalModelRequestFields.tool_choice when response_format pins bf_so_* (conflict hazard)") + } +} diff --git a/core/providers/bedrock/invoke.go b/core/providers/bedrock/invoke.go index c21520edc4..bbd090a214 100644 --- a/core/providers/bedrock/invoke.go +++ b/core/providers/bedrock/invoke.go @@ -644,24 +644,27 @@ func ToBedrockInvokeMessagesStreamResponse(ctx *schemas.BifrostContext, resp *sc } // For Anthropic models (and default): serialize as Anthropic Messages API SSE events, - // then wrap in InvokeModelRawChunk - rawBytes, err := toAnthropicInvokeStreamBytes(resp) + // then wrap in InvokeModelRawChunks. Some Bifrost events map to multiple Anthropic events + // (e.g., Completed → message_delta + message_stop). + rawChunks, err := toAnthropicInvokeStreamBytes(resp) if err != nil { return "", nil, err } - if rawBytes == nil { + if len(rawChunks) == 0 { return "", nil, nil } bedrockEvent := &BedrockStreamEvent{ - InvokeModelRawChunk: rawBytes, + InvokeModelRawChunks: rawChunks, } return "", bedrockEvent, nil } // toAnthropicInvokeStreamBytes converts a Bifrost stream event into raw bytes representing -// the Anthropic Messages API streaming event JSON, suitable for wrapping in InvokeModelRawChunk. -func toAnthropicInvokeStreamBytes(resp *schemas.BifrostResponsesStreamResponse) ([]byte, error) { +// the Anthropic Messages API streaming event JSON, suitable for wrapping in InvokeModelRawChunks. +// Returns a slice of byte slices since some Bifrost events map to multiple Anthropic events +// (e.g., Completed → message_delta + message_stop). +func toAnthropicInvokeStreamBytes(resp *schemas.BifrostResponsesStreamResponse) ([][]byte, error) { var event interface{} switch resp.Type { @@ -830,13 +833,13 @@ func toAnthropicInvokeStreamBytes(resp *schemas.BifrostResponsesStreamResponse) return nil, nil case schemas.ResponsesStreamResponseTypeCompleted: - // Emit message_delta + message_stop + // Emit message_delta + message_stop as two separate events stopReason := "end_turn" if resp.Response != nil && resp.Response.IncompleteDetails != nil { stopReason = resp.Response.IncompleteDetails.Reason } - // Build combined payload: message_delta data + // Build message_delta event messageDelta := map[string]interface{}{ "type": "message_delta", "delta": map[string]interface{}{ @@ -849,7 +852,22 @@ func toAnthropicInvokeStreamBytes(resp *schemas.BifrostResponsesStreamResponse) "output_tokens": resp.Response.Usage.OutputTokens, } } - event = messageDelta + + // Build message_stop event + messageStop := map[string]interface{}{ + "type": "message_stop", + } + + // Marshal both events + deltaBytes, err := providerUtils.MarshalSorted(messageDelta) + if err != nil { + return nil, fmt.Errorf("failed to marshal message_delta event: %w", err) + } + stopBytes, err := providerUtils.MarshalSorted(messageStop) + if err != nil { + return nil, fmt.Errorf("failed to marshal message_stop event: %w", err) + } + return [][]byte{deltaBytes, stopBytes}, nil default: return nil, nil @@ -859,9 +877,9 @@ func toAnthropicInvokeStreamBytes(resp *schemas.BifrostResponsesStreamResponse) return nil, nil } - bytes, err := providerUtils.MarshalSorted(event) + eventBytes, err := providerUtils.MarshalSorted(event) if err != nil { return nil, fmt.Errorf("failed to marshal invoke stream event: %w", err) } - return bytes, nil + return [][]byte{eventBytes}, nil } diff --git a/core/providers/bedrock/responses.go b/core/providers/bedrock/responses.go index f8ba172fb5..1f53e12083 100644 --- a/core/providers/bedrock/responses.go +++ b/core/providers/bedrock/responses.go @@ -1302,11 +1302,11 @@ type BedrockInvokeStreamChunkEvent struct { func (event *BedrockStreamEvent) ToEncodedEvents() []BedrockEncodedEvent { var events []BedrockEncodedEvent - if event.InvokeModelRawChunk != nil { + for _, rawChunk := range event.InvokeModelRawChunks { events = append(events, BedrockEncodedEvent{ EventType: "chunk", Payload: BedrockInvokeStreamChunkEvent{ - Bytes: event.InvokeModelRawChunk, + Bytes: rawChunk, }, }) } @@ -1500,20 +1500,30 @@ func (request *BedrockConverseRequest) ToBifrostResponsesRequest(ctx *schemas.Bi if summaryValue, ok := schemas.SafeExtractStringPointer(request.ExtraParams["reasoning_summary"]); ok { summary = summaryValue } - // Check for native output_config.effort first + var ( + effortStr string + found bool + ) + // Check for native output_config.effort first. + // output_config may be preserved as OrderedMap by the merge path. if outputConfig, ok := request.AdditionalModelRequestFields.Get("output_config"); ok { - if outputConfigMap, ok := outputConfig.(map[string]interface{}); ok { - if effortStr, ok := schemas.SafeExtractString(outputConfigMap["effort"]); ok { - var maxTokens *int - if budgetTokens, ok := schemas.SafeExtractInt(reasoningConfigMap["budget_tokens"]); ok { - maxTokens = schemas.Ptr(budgetTokens) - } - bifrostReq.Params.Reasoning = &schemas.ResponsesParametersReasoning{ - Effort: schemas.Ptr(effortStr), - MaxTokens: maxTokens, - Summary: summary, - } + if outputConfigOrderedMap, ok := schemas.SafeExtractOrderedMap(outputConfig); ok && outputConfigOrderedMap != nil { + if effortValue, exists := outputConfigOrderedMap.Get("effort"); exists { + effortStr, found = schemas.SafeExtractString(effortValue) } + } else if outputConfigMap, ok := outputConfig.(map[string]interface{}); ok { + effortStr, found = schemas.SafeExtractString(outputConfigMap["effort"]) + } + } + if found { + var maxTokens *int + if budgetTokens, ok := schemas.SafeExtractInt(reasoningConfigMap["budget_tokens"]); ok { + maxTokens = schemas.Ptr(budgetTokens) + } + bifrostReq.Params.Reasoning = &schemas.ResponsesParametersReasoning{ + Effort: schemas.Ptr(effortStr), + MaxTokens: maxTokens, + Summary: summary, } } else if maxTokens, ok := schemas.SafeExtractInt(reasoningConfigMap["budget_tokens"]); ok { // Fallback: convert budget_tokens to effort @@ -1673,6 +1683,8 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas. } } + var responsesStructuredOutputTool *BedrockTool + // Map basic parameters to inference config if bifrostReq.Params != nil { inferenceConfig := &BedrockInferenceConfig{} @@ -1770,9 +1782,7 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas. bedrockReq.AdditionalModelRequestFields.Set("thinking", map[string]any{ "type": "adaptive", }) - bedrockReq.AdditionalModelRequestFields.Set("output_config", map[string]any{ - "effort": effort, - }) + setOutputConfigField(bedrockReq.AdditionalModelRequestFields, "effort", effort) } else { // Opus 4.5 and older Anthropic models: budget_tokens thinking modelDefaultMaxTokens := providerUtils.GetMaxOutputTokensOrDefault(bifrostReq.Model, DefaultCompletionMaxTokens) @@ -1829,19 +1839,17 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas. } if bifrostReq.Params.Text != nil { if bifrostReq.Params.Text.Format != nil { - responseFormatTool := convertTextFormatToTool(ctx, bifrostReq.Params.Text) - // append to bedrockTools - if responseFormatTool != nil { - if bedrockReq.ToolConfig == nil { - bedrockReq.ToolConfig = &BedrockToolConfig{} - } - bedrockReq.ToolConfig.Tools = append(bedrockReq.ToolConfig.Tools, *responseFormatTool) - // Force the model to use this specific tool (same as ChatCompletion) - bedrockReq.ToolConfig.ToolChoice = &BedrockToolChoice{ - Tool: &BedrockToolChoiceTool{ - Name: responseFormatTool.ToolSpec.Name, - }, + responseFormatTool, anthropicOutputFormat := convertTextFormatToTool(ctx, bifrostReq.Model, bifrostReq.Params.Text) + if anthropicOutputFormat != nil { + if bedrockReq.AdditionalModelRequestFields == nil { + bedrockReq.AdditionalModelRequestFields = schemas.NewOrderedMap() } + setOutputConfigField(bedrockReq.AdditionalModelRequestFields, "format", anthropicOutputFormat) + } + // Defer synthetic tool injection until after normal tool/tool_choice conversion + // so the structured-output tool is not overwritten by the later pass. + if responseFormatTool != nil { + responsesStructuredOutputTool = responseFormatTool } } } @@ -1855,7 +1863,10 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas. if requestFields, exists := bifrostReq.Params.ExtraParams["additionalModelRequestFieldPaths"]; exists { if orderedFields, ok := schemas.SafeExtractOrderedMap(requestFields); ok { delete(bedrockReq.ExtraParams, "additionalModelRequestFieldPaths") - bedrockReq.AdditionalModelRequestFields = orderedFields + bedrockReq.AdditionalModelRequestFields = mergeAdditionalModelRequestFields( + bedrockReq.AdditionalModelRequestFields, + orderedFields, + ) } } @@ -1959,6 +1970,20 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas. } } + // If text.format was converted to a synthetic tool, inject it after the normal + // tool/tool_choice pass so it is not overwritten by the above conversion. + if responsesStructuredOutputTool != nil { + if bedrockReq.ToolConfig == nil { + bedrockReq.ToolConfig = &BedrockToolConfig{} + } + bedrockReq.ToolConfig.Tools = append([]BedrockTool{*responsesStructuredOutputTool}, bedrockReq.ToolConfig.Tools...) + bedrockReq.ToolConfig.ToolChoice = &BedrockToolChoice{ + Tool: &BedrockToolChoiceTool{ + Name: responsesStructuredOutputTool.ToolSpec.Name, + }, + } + } + // Ensure tool config is present when tool content exists (similar to Chat Completions) ensureResponsesToolConfigForConversation(bifrostReq, bedrockReq) @@ -2565,6 +2590,18 @@ func ConvertBifrostMessagesToBedrockMessages(bifrostMessages []schemas.Responses for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { if block.Text != nil { resultContent = append(resultContent, tryParseJSONIntoContentBlock(*block.Text)) + } else if block.Type == schemas.ResponsesInputMessageContentBlockTypeImage && + block.ResponsesInputMessageContentBlockImage != nil && + block.ResponsesInputMessageContentBlockImage.ImageURL != nil { + imageSource, err := convertImageToBedrockSource(*block.ResponsesInputMessageContentBlockImage.ImageURL) + if err != nil { + // Bedrock only supports base64 data URIs for images. If conversion + // fails (e.g. remote URL), the image is dropped from the tool result + // which silently degrades the model's ability to see tool output. + _ = fmt.Errorf("bedrock: converting tool result image: %w", err) + } else { + resultContent = append(resultContent, BedrockContentBlock{Image: imageSource}) + } } } } diff --git a/core/providers/bedrock/types.go b/core/providers/bedrock/types.go index 388571cc38..98c46ae96f 100644 --- a/core/providers/bedrock/types.go +++ b/core/providers/bedrock/types.go @@ -603,7 +603,10 @@ type BedrockStreamEvent struct { AdditionalModelResponseFields interface{} `json:"additionalModelResponseFields,omitempty"` // For InvokeModelWithResponseStream (Legacy API) - InvokeModelRawChunk []byte `json:"invokeModelRawChunk,omitempty"` // Raw bytes for legacy invoke stream + // InvokeModelRawChunks holds one or more raw byte payloads for legacy invoke stream. + // Multiple chunks are needed when a single Bifrost event maps to multiple Anthropic SSE events + // (e.g., Completed → message_delta + message_stop). + InvokeModelRawChunks [][]byte `json:"invokeModelRawChunks,omitempty"` } // BedrockMessageStartEvent indicates the start of a message diff --git a/core/providers/bedrock/utils.go b/core/providers/bedrock/utils.go index 1a8885ca54..4eb48452a0 100644 --- a/core/providers/bedrock/utils.go +++ b/core/providers/bedrock/utils.go @@ -74,14 +74,60 @@ func convertChatParameters(ctx *schemas.BifrostContext, bifrostReq *schemas.Bifr bedrockReq.InferenceConfig = inferenceConfig } - // Check for response_format and convert to tool - responseFormatTool := convertResponseFormatToTool(ctx, bifrostReq.Params) + // Handle structured output conversion: + // - Anthropic models on Bedrock use native output_config.format + // - Other models keep the response_format->tool conversion. + responseFormatTool, anthropicOutputFormat := convertResponseFormatToTool(ctx, bifrostReq.Model, bifrostReq.Params) + if anthropicOutputFormat != nil { + if bedrockReq.AdditionalModelRequestFields == nil { + bedrockReq.AdditionalModelRequestFields = schemas.NewOrderedMap() + } + setOutputConfigField(bedrockReq.AdditionalModelRequestFields, "format", anthropicOutputFormat) + } - // Convert tool config - if toolConfig := convertToolConfig(bifrostReq.Model, bifrostReq.Params); toolConfig != nil { + // Filter provider-unsupported server tools once; both convertToolConfig and + // collectBedrockServerTools consume the same filtered set, and + // buildBedrockServerToolChoice resolves pinned names against it. + filteredTools, _ := anthropic.ValidateChatToolsForProvider(bifrostReq.Params.Tools, schemas.Bedrock) + + // Convert tool config (function/custom tools → Converse toolConfig.tools). + if toolConfig := convertToolConfigFromFiltered(bifrostReq.Model, bifrostReq.Params, filteredTools); toolConfig != nil { bedrockReq.ToolConfig = toolConfig } + // Tunnel Bedrock-supported Anthropic server tools through Converse's + // additionalModelRequestFields (model-specific passthrough) since Converse's + // typed toolSpec shape can't express server tools like bash_*, computer_*, + // memory_*, text_editor_*, tool_search_tool_*. Fields injected: + // - tools: array of server tools in Anthropic-native shape, which + // Bedrock merges into the underlying Messages request. + // - anthropic_beta: activation header(s) for the relevant server tool, in + // addition to whatever the existing anthropic-beta HTTP + // header path in bedrock.go:214/447 already forwards. + // - tool_choice: Anthropic-native pin for a kept server tool OR an + // any/required contract when only server tools are + // present. Emitted only when Converse's typed + // toolConfig.toolChoice path can't express the intent + // (see buildBedrockServerToolChoice). + if serverTools, betaHeaders := collectBedrockServerToolsFromFiltered(filteredTools); len(serverTools) > 0 { + if bedrockReq.AdditionalModelRequestFields == nil { + bedrockReq.AdditionalModelRequestFields = schemas.NewOrderedMap() + } + bedrockReq.AdditionalModelRequestFields.Set("tools", serverTools) + if len(betaHeaders) > 0 { + bedrockReq.AdditionalModelRequestFields.Set("anthropic_beta", betaHeaders) + } + // Skip the tunneled tool_choice when response_format forces the synthetic + // bf_so_* tool at lines 263-275 below; otherwise Bedrock receives two + // conflicting tool-choice directives and the structured-output contract + // can silently break. + if responseFormatTool == nil { + if choice, ok := buildBedrockServerToolChoice(bifrostReq.Params, filteredTools); ok { + bedrockReq.AdditionalModelRequestFields.Set("tool_choice", choice) + } + } + } + // Convert reasoning config if bifrostReq.Params.Reasoning != nil { if bedrockReq.AdditionalModelRequestFields == nil { @@ -190,9 +236,7 @@ func convertChatParameters(ctx *schemas.BifrostContext, bifrostReq *schemas.Bifr bedrockReq.AdditionalModelRequestFields.Set("thinking", map[string]any{ "type": "adaptive", }) - bedrockReq.AdditionalModelRequestFields.Set("output_config", map[string]any{ - "effort": effort, - }) + setOutputConfigField(bedrockReq.AdditionalModelRequestFields, "effort", effort) } else { // Opus 4.5 and older models: budget_tokens thinking budgetTokens, err := providerUtils.GetBudgetTokensFromReasoningEffort(*bifrostReq.Params.Reasoning.Effort, anthropic.MinimumReasoningMaxTokens, maxTokens) @@ -270,7 +314,10 @@ func convertChatParameters(ctx *schemas.BifrostContext, bifrostReq *schemas.Bifr if requestFields, exists := bifrostReq.Params.ExtraParams["additionalModelRequestFieldPaths"]; exists { if orderedFields, ok := schemas.SafeExtractOrderedMap(requestFields); ok { delete(bedrockReq.ExtraParams, "additionalModelRequestFieldPaths") - bedrockReq.AdditionalModelRequestFields = orderedFields + bedrockReq.AdditionalModelRequestFields = mergeAdditionalModelRequestFields( + bedrockReq.AdditionalModelRequestFields, + orderedFields, + ) } } @@ -341,6 +388,103 @@ func convertChatParameters(ctx *schemas.BifrostContext, bifrostReq *schemas.Bifr return nil } +// setOutputConfigField upserts a single key in additionalModelRequestFields.output_config +// while preserving any existing output_config keys (e.g. keep "format" when adding "effort"). +func setOutputConfigField(fields *schemas.OrderedMap, key string, value any) { + if fields == nil { + return + } + current := schemas.NewOrderedMap() + if existing, ok := fields.Get("output_config"); ok { + if om, ok := toOrderedMap(existing); ok && om != nil { + current = om + } + } + current.Set(key, value) + fields.Set("output_config", current) +} + +func mergeAdditionalModelRequestFields(existing, incoming *schemas.OrderedMap) *schemas.OrderedMap { + if existing == nil { + if incoming == nil { + return nil + } + return incoming.Clone() + } + if incoming == nil { + return existing + } + + merged := existing.Clone() + incoming.Range(func(key string, value interface{}) bool { + if key == "output_config" { + current := schemas.NewOrderedMap() + if existingValue, ok := merged.Get(key); ok { + if om, ok := toOrderedMap(existingValue); ok && om != nil { + current = om + } + } + if incomingMap, ok := toOrderedMap(value); ok && incomingMap != nil { + mergeOrderedMapInto(current, incomingMap) + merged.Set(key, current) + } else { + merged.Set(key, value) + } + return true + } + merged.Set(key, value) + return true + }) + return merged +} + +func toOrderedMap(v any) (*schemas.OrderedMap, bool) { + switch m := v.(type) { + case *schemas.OrderedMap: + if m == nil { + return nil, false + } + return m.Clone(), true + case schemas.OrderedMap: + return m.Clone(), true + case map[string]interface{}: + // Fallback for callers that still provide a plain map. Order cannot be + // reconstructed here, but keeping this path preserves compatibility. + return schemas.OrderedMapFromMap(m), true + default: + return nil, false + } +} + +// mergeOrderedMapInto deep-merges src into dst. Nested OrderedMap values are +// merged recursively; non-map values from src overwrite dst. Existing key order +// is preserved and newly introduced keys are appended in source order. +func mergeOrderedMapInto(dst, src *schemas.OrderedMap) { + if dst == nil || src == nil { + return + } + src.Range(func(key string, srcVal interface{}) bool { + if srcMap, ok := toOrderedMap(srcVal); ok && srcMap != nil { + if dstVal, exists := dst.Get(key); exists { + if dstMap, ok := toOrderedMap(dstVal); ok && dstMap != nil { + mergeOrderedMapInto(dstMap, srcMap) + dst.Set(key, dstMap) + return true + } + } + } + dst.Set(key, srcVal) + return true + }) +} + +func newAnthropicOutputFormatOrderedMap(schemaObj any) *schemas.OrderedMap { + return schemas.NewOrderedMapFromPairs( + schemas.KV("type", "json_schema"), + schemas.KV("schema", schemaObj), + ) +} + // ensureChatToolConfigForConversation ensures toolConfig is present when tool content exists func ensureChatToolConfigForConversation(bifrostReq *schemas.BifrostChatRequest, bedrockReq *BedrockConverseRequest) { if bedrockReq.ToolConfig != nil { @@ -825,44 +969,70 @@ func convertImageToBedrockSource(imageURL string) (*BedrockImageSource, error) { // convertResponseFormatToTool converts a response_format parameter to a Bedrock tool // Returns nil if no response_format is present or if it's not a json_schema type // Ref: https://aws.amazon.com/blogs/machine-learning/structured-data-response-with-amazon-bedrock-prompt-engineering-and-tool-use/ -func convertResponseFormatToTool(ctx *schemas.BifrostContext, params *schemas.ChatParameters) *BedrockTool { +func convertResponseFormatToTool( + ctx *schemas.BifrostContext, + model string, + params *schemas.ChatParameters, +) (*BedrockTool, any) { if params == nil || params.ResponseFormat == nil { - return nil + return nil, nil } - // ResponseFormat is stored as interface{}, need to parse it - responseFormatMap, ok := (*params.ResponseFormat).(map[string]interface{}) - if !ok { - return nil + responseFormatMap, ok := schemas.SafeExtractOrderedMap(*params.ResponseFormat) + if !ok || responseFormatMap == nil { + return nil, nil } // Check if type is "json_schema" - formatType, ok := responseFormatMap["type"].(string) + formatTypeRaw, ok := responseFormatMap.Get("type") + if !ok { + return nil, nil + } + formatType, ok := schemas.SafeExtractString(formatTypeRaw) if !ok || formatType != "json_schema" { - return nil + return nil, nil } // Extract json_schema object - jsonSchemaObj, ok := responseFormatMap["json_schema"].(map[string]interface{}) + jsonSchemaRaw, ok := responseFormatMap.Get("json_schema") if !ok { - return nil + return nil, nil } - - // Extract name and schema - toolName, ok := jsonSchemaObj["name"].(string) - if !ok || toolName == "" { - toolName = "json_response" + jsonSchemaObj, ok := schemas.SafeExtractOrderedMap(jsonSchemaRaw) + if !ok || jsonSchemaObj == nil { + return nil, nil } - schemaObj, ok := jsonSchemaObj["schema"].(map[string]interface{}) + schemaObj, ok := jsonSchemaObj.Get("schema") if !ok { - return nil + return nil, nil + } + + // Anthropic Bedrock supports native output_config.format. Keep this provider-specific + // conversion encapsulated here, and let caller just apply returned values. + if schemas.IsAnthropicModel(model) { + return nil, newAnthropicOutputFormatOrderedMap(schemaObj) + } + + // Extract name and schema + toolNameRaw, hasName := jsonSchemaObj.Get("name") + toolName, ok := schemas.SafeExtractString(toolNameRaw) + if !hasName || !ok || toolName == "" { + toolName = "json_response" } // Extract description from schema if available description := "Returns structured JSON output" - if desc, ok := schemaObj["description"].(string); ok && desc != "" { - description = desc + if schemaMap, ok := schemas.SafeExtractOrderedMap(schemaObj); ok && schemaMap != nil { + if descRaw, hasDesc := schemaMap.Get("description"); hasDesc { + if desc, ok := schemas.SafeExtractString(descRaw); ok && desc != "" { + description = desc + } + } + } else if schemaMap, ok := schemaObj.(map[string]interface{}); ok { + if desc, ok := schemaMap["description"].(string); ok && desc != "" { + description = desc + } } // set bifrost context key structured output tool name @@ -872,7 +1042,7 @@ func convertResponseFormatToTool(ctx *schemas.BifrostContext, params *schemas.Ch // Create the Bedrock tool schemaObjBytes, err := providerUtils.MarshalSorted(schemaObj) if err != nil { - return nil + return nil, nil } return &BedrockTool{ ToolSpec: &BedrockToolSpec{ @@ -882,18 +1052,19 @@ func convertResponseFormatToTool(ctx *schemas.BifrostContext, params *schemas.Ch JSON: json.RawMessage(schemaObjBytes), }, }, - } + }, nil } -// convertTextFormatToTool converts a text config to a Bedrock tool for structured outpute -func convertTextFormatToTool(ctx *schemas.BifrostContext, textConfig *schemas.ResponsesTextConfig) *BedrockTool { +// convertTextFormatToTool converts a Responses text.format config to either a +// synthetic Bedrock tool or an Anthropic-native output_config.format value. +func convertTextFormatToTool(ctx *schemas.BifrostContext, model string, textConfig *schemas.ResponsesTextConfig) (*BedrockTool, any) { if textConfig == nil || textConfig.Format == nil { - return nil + return nil, nil } format := textConfig.Format if format.Type != "json_schema" { - return nil + return nil, nil } toolName := "json_response" @@ -902,23 +1073,24 @@ func convertTextFormatToTool(ctx *schemas.BifrostContext, textConfig *schemas.Re } description := "Returns structured JSON output" + if format.JSONSchema == nil || format.JSONSchema.Schema == nil { + return nil, nil // Schema is required for structured output + } if format.JSONSchema.Description != nil { description = *format.JSONSchema.Description } + schemaObj := *format.JSONSchema.Schema + + if schemas.IsAnthropicModel(model) { + return nil, newAnthropicOutputFormatOrderedMap(schemaObj) + } toolName = fmt.Sprintf("bf_so_%s", toolName) ctx.SetValue(schemas.BifrostContextKeyStructuredOutputToolName, toolName) - var schemaObj any - if format.JSONSchema != nil { - schemaObj = *format.JSONSchema - } else { - return nil // Schema is required for Bedrock tooling - } - schemaObjBytes2, err := providerUtils.MarshalSorted(schemaObj) if err != nil { - return nil + return nil, nil } return &BedrockTool{ ToolSpec: &BedrockToolSpec{ @@ -928,7 +1100,7 @@ func convertTextFormatToTool(ctx *schemas.BifrostContext, textConfig *schemas.Re JSON: json.RawMessage(schemaObjBytes2), }, }, - } + }, nil } // convertInferenceConfig converts Bifrost parameters to Bedrock inference config @@ -953,14 +1125,225 @@ func convertInferenceConfig(params *schemas.ChatParameters) *BedrockInferenceCon return &config } -// convertToolConfig converts Bifrost tools to Bedrock tool config +// collectBedrockServerTools partitions kept tools into the function/custom +// set (which convertToolConfig materializes into Converse's toolConfig.tools) +// and the kept-server-tool set (which cannot be expressed via Converse's +// typed toolSpec slot and must be tunneled via additionalModelRequestFields). +// +// Returns: +// - serverTools: each ChatTool serialized to its Anthropic-native JSON shape +// (e.g. `{"type":"computer_20251124","name":"computer","display_width_px":1280}`) +// ready to drop into additionalModelRequestFields.tools. Per the comment on +// ChatTool in core/schemas/chatcompletions.go:340-351, the default marshaler +// produces this shape directly — no custom codec needed. +// - betaHeaders: anthropic-beta header values derived from the server tool +// Types, filtered through FilterBetaHeadersForProvider(schemas.Bedrock) so +// only Bedrock-approved headers survive. Only high-confidence mappings are +// derived here (computer_* and memory_*); callers relying on other betas +// (e.g. text_editor-specific headers) should continue supplying them via +// extra-headers / ctx — they flow through bedrock.go's existing +// anthropic-beta HTTP header path. +// +// Unsupported server tools (e.g. web_search on Bedrock) are dropped upstream +// by ValidateChatToolsForProvider, so they never reach this helper. +func collectBedrockServerTools(params *schemas.ChatParameters) (serverTools []json.RawMessage, betaHeaders []string) { + if params == nil || len(params.Tools) == 0 { + return nil, nil + } + filtered, _ := anthropic.ValidateChatToolsForProvider(params.Tools, schemas.Bedrock) + return collectBedrockServerToolsFromFiltered(filtered) +} + +// collectBedrockServerToolsFromFiltered is the inner variant that accepts a +// pre-filtered tool set (already run through ValidateChatToolsForProvider). +// convertChatParameters filters once and passes the result to both this helper +// and convertToolConfigFromFiltered to avoid re-filtering twice per request. +func collectBedrockServerToolsFromFiltered(filtered []schemas.ChatTool) (serverTools []json.RawMessage, betaHeaders []string) { + if len(filtered) == 0 { + return nil, nil + } + seenBeta := make(map[string]struct{}) + for _, tool := range filtered { + if tool.Function != nil || tool.Custom != nil { + continue + } + bytes, err := providerUtils.MarshalSorted(tool) + if err != nil { + continue + } + serverTools = append(serverTools, json.RawMessage(bytes)) + for _, h := range deriveBedrockBetaHeadersForToolType(string(tool.Type)) { + if _, ok := seenBeta[h]; ok { + continue + } + seenBeta[h] = struct{}{} + betaHeaders = append(betaHeaders, h) + } + } + if len(betaHeaders) > 0 { + // Gate through the Bedrock-approved beta-header list. + betaHeaders = anthropic.FilterBetaHeadersForProvider(betaHeaders, schemas.Bedrock) + } + return serverTools, betaHeaders +} + +// buildBedrockServerToolChoice emits an Anthropic-native tool_choice value +// for tunneling through additionalModelRequestFields.tool_choice ONLY when +// Converse's typed toolConfig.toolChoice path cannot express the caller's +// intent: +// +// - Named pin of a kept server tool: convertToolConfig builds toolConfig.tools +// from function/custom tools only, and its reconciliation (around line +// 1274) drops any named pin that doesn't match an entry in that slice. +// Server-tool names never appear there, so a legitimate pin like +// tool_choice={type:"function", function:{name:"computer"}} gets silently +// nuked. We tunnel {"type":"tool","name":"computer"} instead so the +// forced-tool contract reaches Anthropic via Bedrock's merge. +// - any/required with only server tools: convertToolConfig returns nil +// entirely (empty-slice guard since bedrockTools is empty), so the typed +// "any" contract is lost. We tunnel {"type":"any"} to preserve it. +// +// Returns (nil, false) when the typed Converse path is adequate (auto/none, +// function-tool pin, any with function tools present, or a pin whose name +// doesn't match any kept server tool). +// +// Anthropic tool_choice shape ref: platform.claude.com/docs/en/docs/agents-and-tools/tool-use/define-tools +// ("Controlling Claude's output / Forcing tool use" — four options: +// auto, any, tool, none; forced tool shape is {"type":"tool","name":"..."}). +func buildBedrockServerToolChoice(params *schemas.ChatParameters, filtered []schemas.ChatTool) (json.RawMessage, bool) { + if params == nil || params.ToolChoice == nil { + return nil, false + } + + // Resolve effective type and optional pinned name from either the string + // or struct representation of ChatToolChoice. + var ( + choiceType schemas.ChatToolChoiceType + pinnedName string + ) + if params.ToolChoice.ChatToolChoiceStr != nil { + choiceType = schemas.ChatToolChoiceType(*params.ToolChoice.ChatToolChoiceStr) + } else if params.ToolChoice.ChatToolChoiceStruct != nil { + s := params.ToolChoice.ChatToolChoiceStruct + choiceType = s.Type + if s.Function != nil { + pinnedName = s.Function.Name + } else if s.Custom != nil { + pinnedName = s.Custom.Name + } + } else { + return nil, false + } + + // Partition kept tools: server-tool name set, plus whether any + // function/custom tool is present. + serverToolNames := make(map[string]struct{}) + hasFunctionOrCustom := false + for _, tool := range filtered { + if tool.Function != nil || tool.Custom != nil { + hasFunctionOrCustom = true + continue + } + if tool.Name != "" { + serverToolNames[tool.Name] = struct{}{} + } + } + + switch choiceType { + case schemas.ChatToolChoiceTypeFunction, schemas.ChatToolChoiceTypeCustom, + schemas.ChatToolChoiceType("tool"): + // Only tunnel when the pinned name matches a kept server tool. + // Function/custom pins stay on the typed Converse path. + if pinnedName == "" { + return nil, false + } + if _, ok := serverToolNames[pinnedName]; !ok { + return nil, false + } + bytes, err := providerUtils.MarshalSorted(map[string]any{ + "type": "tool", + "name": pinnedName, + }) + if err != nil { + return nil, false + } + return json.RawMessage(bytes), true + + case schemas.ChatToolChoiceTypeAny, schemas.ChatToolChoiceTypeRequired: + // When function/custom tools are present, Converse's typed + // toolChoice.any handles the any contract — don't double-emit. + if hasFunctionOrCustom || len(serverToolNames) == 0 { + return nil, false + } + bytes, err := providerUtils.MarshalSorted(map[string]any{"type": "any"}) + if err != nil { + return nil, false + } + return json.RawMessage(bytes), true + + default: + // auto, none, allowed_tools, empty, unknown — no tunneling. + return nil, false + } +} + +// deriveBedrockBetaHeadersForToolType maps an Anthropic server-tool Type string +// to the anthropic-beta header(s) Bedrock requires for the feature to activate. +// Only high-confidence mappings are encoded here — both are anchored in +// core/providers/anthropic/types.go (cite: B-header comments around lines 178-183). +// Unknown prefixes return nil; callers can still inject betas via extra-headers. +func deriveBedrockBetaHeadersForToolType(toolType string) []string { + switch { + case strings.HasPrefix(toolType, "computer_"): + // computer_YYYYMMDD → computer-use-YYYY-MM-DD (Bedrock B-header). + rest := strings.TrimPrefix(toolType, "computer_") + if len(rest) == 8 { + return []string{"computer-use-" + rest[0:4] + "-" + rest[4:6] + "-" + rest[6:8]} + } + return nil + case strings.HasPrefix(toolType, "memory_"): + // Memory activates via the context-management bundle on Bedrock + // (see anthropic/types.go:179 — "context-management-2025-06-27 per + // B-header (bundles memory)"). + return []string{"context-management-2025-06-27"} + } + return nil +} + +// convertToolConfig converts Bifrost tools to Bedrock tool config. +// +// Responsibilities (split from collectBedrockServerTools): +// - Filters server tools the target provider doesn't support via +// ValidateChatToolsForProvider (e.g. web_search on Bedrock per cited +// docs — AWS user guide beta-header list, Anthropic overview feature +// table). Silently stripped. +// - Materializes function/custom tools into Converse's typed toolConfig.tools. +// Kept server tools (bash_*, computer_*, memory_*, text_editor_*, +// tool_search_tool_*) are NOT emitted here — they are handled separately +// by collectBedrockServerTools → additionalModelRequestFields.tools, since +// Converse's toolSpec slot has no shape for them. +// - Returns nil instead of an empty-slice ToolConfig, since Bedrock's +// Converse API rejects `"toolConfig": {"tools": []}` with a 400. func convertToolConfig(model string, params *schemas.ChatParameters) *BedrockToolConfig { - if len(params.Tools) == 0 { + if params == nil || len(params.Tools) == 0 { + return nil + } + // Strip unsupported server tools before the conversion loop. + filtered, _ := anthropic.ValidateChatToolsForProvider(params.Tools, schemas.Bedrock) + return convertToolConfigFromFiltered(model, params, filtered) +} + +// convertToolConfigFromFiltered is the inner variant that accepts a +// pre-filtered tool set. convertChatParameters uses this to avoid filtering +// twice (once here, once in collectBedrockServerTools). The public +// convertToolConfig entry point is a thin wrapper preserved for tests. +func convertToolConfigFromFiltered(model string, params *schemas.ChatParameters, filtered []schemas.ChatTool) *BedrockToolConfig { + if params == nil { return nil } var bedrockTools []BedrockTool - for _, tool := range params.Tools { + for _, tool := range filtered { if tool.Function != nil { // Serialize the parameters (or a default empty schema) to json.RawMessage var schemaObjectBytes []byte @@ -986,7 +1369,7 @@ func convertToolConfig(model string, params *schemas.ChatParameters) *BedrockToo bedrockTool := BedrockTool{ ToolSpec: &BedrockToolSpec{ Name: tool.Function.Name, - Description: schemas.Ptr(description), + Description: new(description), InputSchema: BedrockToolInputSchema{ JSON: json.RawMessage(schemaObjectBytes), }, @@ -1004,6 +1387,15 @@ func convertToolConfig(model string, params *schemas.ChatParameters) *BedrockToo } } + // Empty-guard: Bedrock's Converse API rejects {"toolConfig": {"tools": []}} + // with a 400 "The provided request is not valid". If every incoming tool + // was filtered out above (e.g. only server tools the target provider + // doesn't support), omit ToolConfig entirely so the request is valid and + // the model simply answers without tool access. + if len(bedrockTools) == 0 { + return nil + } + toolConfig := &BedrockToolConfig{ Tools: bedrockTools, } @@ -1012,7 +1404,28 @@ func convertToolConfig(model string, params *schemas.ChatParameters) *BedrockToo if params.ToolChoice != nil { toolChoice := convertToolChoice(*params.ToolChoice) if toolChoice != nil { - toolConfig.ToolChoice = toolChoice + // Reconcile: if the choice forces a specific tool by name, + // verify that name still exists in the filtered tool set. + // Without this, a caller that pinned a server tool we just + // stripped (e.g. web_search on Bedrock) would ship a + // toolChoice.tool.name ∉ tools, and Bedrock's Converse API + // rejects that with a 400 ValidationException — defeating + // the silent-strip contract. + if toolChoice.Tool != nil && toolChoice.Tool.Name != "" { + found := false + for _, bt := range bedrockTools { + if bt.ToolSpec != nil && bt.ToolSpec.Name == toolChoice.Tool.Name { + found = true + break + } + } + if !found { + toolChoice = nil + } + } + if toolChoice != nil { + toolConfig.ToolChoice = toolChoice + } } } diff --git a/core/providers/cohere/cohere.go b/core/providers/cohere/cohere.go index 4386a55d11..fd50f6d6d5 100644 --- a/core/providers/cohere/cohere.go +++ b/core/providers/cohere/cohere.go @@ -518,6 +518,7 @@ func (provider *CohereProvider) ChatCompletionStream(ctx *schemas.BifrostContext // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ChatCompletionStreamRequest, provider.logger) @@ -806,6 +807,7 @@ func (provider *CohereProvider) ResponsesStream(ctx *schemas.BifrostContext, pos // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ResponsesStreamRequest, provider.logger) diff --git a/core/providers/elevenlabs/elevenlabs.go b/core/providers/elevenlabs/elevenlabs.go index 8b144b63b0..31d3a4761a 100644 --- a/core/providers/elevenlabs/elevenlabs.go +++ b/core/providers/elevenlabs/elevenlabs.go @@ -426,6 +426,7 @@ func (provider *ElevenlabsProvider) SpeechStream(ctx *schemas.BifrostContext, po // which immediately unblocks any in-progress read (including reads blocked inside a gzip decompression layer). stopCancellation := providerUtils.SetupStreamCancellation(ctx, resp.BodyStream(), provider.logger) defer stopCancellation() + defer providerUtils.EnsureStreamFinalizerCalled(ctx) // read binary audio chunks from the stream // 4KB buffer for reading chunks diff --git a/core/providers/gemini/chat.go b/core/providers/gemini/chat.go index da562f9348..9d56a3d12b 100644 --- a/core/providers/gemini/chat.go +++ b/core/providers/gemini/chat.go @@ -504,7 +504,13 @@ func isErrorFinishReason(reason FinishReason) bool { reason == FinishReasonProhibitedContent || reason == FinishReasonSPII || reason == FinishReasonImageSafety || - reason == FinishReasonUnexpectedToolCall + reason == FinishReasonUnexpectedToolCall || + reason == FinishReasonMissingThoughtSignature || + reason == FinishReasonMalformedResponse || + reason == FinishReasonImageProhibitedContent || + reason == FinishReasonImageRecitation || + reason == FinishReasonTooManyToolCalls || + reason == FinishReasonNoImage } // createErrorResponse creates a complete BifrostChatResponse for error cases diff --git a/core/providers/gemini/gemini.go b/core/providers/gemini/gemini.go index 1dd0842158..f4c4a32c01 100644 --- a/core/providers/gemini/gemini.go +++ b/core/providers/gemini/gemini.go @@ -481,6 +481,7 @@ func HandleGeminiChatCompletionStream( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, model, schemas.ChatCompletionStreamRequest, logger) @@ -1017,6 +1018,7 @@ func HandleGeminiResponsesStream( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, model, schemas.ResponsesStreamRequest, logger) @@ -1553,6 +1555,7 @@ func (provider *GeminiProvider) SpeechStream(ctx *schemas.BifrostContext, postHo // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.SpeechStreamRequest, provider.logger) @@ -1871,6 +1874,7 @@ func (provider *GeminiProvider) TranscriptionStream(ctx *schemas.BifrostContext, // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.TranscriptionStreamRequest, provider.logger) @@ -4557,6 +4561,7 @@ func (provider *GeminiProvider) PassthroughStream( ch := make(chan *schemas.BifrostStreamChunk, schemas.DefaultStreamBufferSize) go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, ch, provider.GetProviderKey(), req.Model, schemas.PassthroughStreamRequest, provider.logger) diff --git a/core/providers/gemini/gemini_test.go b/core/providers/gemini/gemini_test.go index 558df9bda7..e1fb192f66 100644 --- a/core/providers/gemini/gemini_test.go +++ b/core/providers/gemini/gemini_test.go @@ -1,6 +1,7 @@ package gemini_test import ( + "context" "encoding/base64" "encoding/json" "os" @@ -1936,6 +1937,70 @@ func TestResponsesAPIParallelFunctionCalling(t *testing.T) { } }, }, + { + name: "ResponsesAPI_FunctionCallOutput_ContentBlocks", + input: &schemas.BifrostResponsesRequest{ + Provider: schemas.Gemini, + Model: "gemini-2.0-flash", + Input: []schemas.ResponsesMessage{ + { + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Type: schemas.Ptr(schemas.ResponsesMessageTypeMessage), + Content: &schemas.ResponsesMessageContent{ + ContentStr: schemas.Ptr("List browser tabs"), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("call_tabs"), + Name: schemas.Ptr("browser_tabs"), + Arguments: schemas.Ptr(`{"action":"list"}`), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr("call_tabs"), + Output: &schemas.ResponsesToolMessageOutputStruct{ + // Output as content blocks (Anthropic Responses API format) + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesInputMessageContentBlockTypeText, + Text: schemas.Ptr("### Open tabs\n- 0: (current) [Google] (https://google.com)\n- 1: [GitHub] (https://github.com)\n"), + }, + }, + }, + }, + }, + }, + }, + validate: func(t *testing.T, result *gemini.GeminiGenerationRequest) { + // Find the Content with function response + var toolResponseContent *gemini.Content + for i := range result.Contents { + content := &result.Contents[i] + if len(content.Parts) > 0 && content.Parts[0].FunctionResponse != nil { + toolResponseContent = content + break + } + } + + require.NotNil(t, toolResponseContent, "Should have a content with functionResponse") + require.Len(t, toolResponseContent.Parts, 1) + + part := toolResponseContent.Parts[0] + require.NotNil(t, part.FunctionResponse, "Part must have functionResponse") + assert.Equal(t, "call_tabs", part.FunctionResponse.ID) + assert.Equal(t, "browser_tabs", part.FunctionResponse.Name) + + // Verify the response data contains the tool output (not empty) + require.NotNil(t, part.FunctionResponse.Response, "FunctionResponse.Response must not be nil") + responseStr := string(part.FunctionResponse.Response) + assert.Contains(t, responseStr, "Open tabs", "Response should contain the tool output text") + assert.Contains(t, responseStr, "Google", "Response should contain tab content") + }, + }, } for _, tt := range tests { @@ -2858,3 +2923,66 @@ func TestThinkingBudgetEffortUsesModelRange(t *testing.T) { "flash effort budget must not exceed model maximum 24576") }) } + +// Regression: GenAI /generateContent path must not turn thinkingLevel into a derived +// thinkingBudget (which changes Gemini 3.x behavior). Inbound should set effort only; +// outbound for Gemini 3+ should emit thinkingLevel again. +func TestGenAIThinkingLevel_RoundTripPreservesLevelNotBudget(t *testing.T) { + level := "MiNiMaL" + geminiReq := &gemini.GeminiGenerationRequest{ + Model: "gemini-3-flash-preview", + GenerationConfig: gemini.GenerationConfig{ + ThinkingConfig: &gemini.GenerationConfigThinkingConfig{ + IncludeThoughts: true, + ThinkingLevel: &level, + }, + }, + } + + bifrostCtx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + bifrostReq := geminiReq.ToBifrostResponsesRequest(bifrostCtx) + require.NotNil(t, bifrostReq.Params) + require.NotNil(t, bifrostReq.Params.Reasoning) + require.NotNil(t, bifrostReq.Params.Reasoning.Effort) + assert.Equal(t, "minimal", *bifrostReq.Params.Reasoning.Effort) + assert.Nil(t, bifrostReq.Params.Reasoning.MaxTokens, "thinkingLevel must not populate reasoning max_tokens") + + roundTrip, err := gemini.ToGeminiResponsesRequest(bifrostReq) + require.NoError(t, err) + require.NotNil(t, roundTrip) + require.NotNil(t, roundTrip.GenerationConfig.ThinkingConfig) + tc := roundTrip.GenerationConfig.ThinkingConfig + require.NotNil(t, tc.ThinkingLevel) + assert.Equal(t, "minimal", *tc.ThinkingLevel) + assert.Nil(t, tc.ThinkingBudget, "round-trip must not synthesize thinkingBudget from level-only config") +} + +// Regression: MAX_TOKENS from Gemini must survive Gemini → Bifrost → Gemini on the GenAI path +// (StopReason used to be dropped, so clients saw STOP instead of MAX_TOKENS). +func TestGenAIFinishReasonMaxTokens_PersistsThroughBifrostRoundTrip(t *testing.T) { + geminiResp := &gemini.GenerateContentResponse{ + ModelVersion: "gemini-2.5-flash", + Candidates: []*gemini.Candidate{ + { + Index: 0, + FinishReason: gemini.FinishReasonMaxTokens, + Content: &gemini.Content{ + Role: "model", + Parts: []*gemini.Part{ + {Text: "partial essay..."}, + }, + }, + }, + }, + } + + bifrostResp := geminiResp.ToResponsesBifrostResponsesResponse() + require.NotNil(t, bifrostResp) + require.NotNil(t, bifrostResp.StopReason) + assert.Equal(t, "length", *bifrostResp.StopReason) + + out := gemini.ToGeminiResponsesResponse(bifrostResp) + require.NotNil(t, out) + require.Len(t, out.Candidates, 1) + assert.Equal(t, gemini.FinishReasonMaxTokens, out.Candidates[0].FinishReason) +} diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go index c9a8af93ba..3be24a3145 100644 --- a/core/providers/gemini/responses.go +++ b/core/providers/gemini/responses.go @@ -160,6 +160,30 @@ func (response *GenerateContentResponse) ToResponsesBifrostResponsesResponse() * // Convert candidates to Responses output messages if len(response.Candidates) > 0 { + candidate := response.Candidates[0] + + // Persist finish reason as Bifrost canonical stop_reason + if candidate.FinishReason != "" && candidate.FinishReason != FinishReasonUnspecified { + stopReason := ConvertGeminiFinishReasonToBifrost(candidate.FinishReason) + bifrostResp.StopReason = &stopReason + + if isErrorFinishReason(candidate.FinishReason) { + failedStatus := "failed" + bifrostResp.Status = &failedStatus + + errMsg := candidate.FinishMessage + if errMsg == "" { + errMsg = string(candidate.FinishReason) + } + bifrostResp.Error = &schemas.ResponsesResponseError{ + Code: stopReason, + Message: errMsg, + } + + return bifrostResp + } + } + outputMessages := convertGeminiCandidatesToResponsesOutput(response.Candidates) if len(outputMessages) > 0 { bifrostResp.Output = outputMessages @@ -409,8 +433,10 @@ func ToGeminiResponsesResponse(bifrostResp *schemas.BifrostResponsesResponse) *G }, } - // Determine finish reason based on incomplete details - if bifrostResp.IncompleteDetails != nil { + // Determine finish reason: prefer StopReason (Bifrost canonical), fall back to IncompleteDetails + if bifrostResp.StopReason != nil { + candidate.FinishReason = ConvertBifrostFinishReasonToGemini(*bifrostResp.StopReason) + } else if bifrostResp.IncompleteDetails != nil { switch bifrostResp.IncompleteDetails.Reason { case "max_tokens": candidate.FinishReason = FinishReasonMaxTokens @@ -692,8 +718,12 @@ func ToGeminiResponsesStreamResponse(bifrostResp *schemas.BifrostResponsesStream streamResp.UsageMetadata = ConvertBifrostResponsesUsageToGeminiUsageMetadata(bifrostResp.Response.Usage) } - // Set finish reason - candidate.FinishReason = FinishReasonStop + // Derive finish reason from StopReason when present + if bifrostResp.Response.StopReason != nil { + candidate.FinishReason = ConvertBifrostFinishReasonToGemini(*bifrostResp.Response.StopReason) + } else { + candidate.FinishReason = FinishReasonStop + } // Attach grounding metadata if we buffered web search data if state.HasWebSearch && state.WebSearchCall != nil { @@ -3016,6 +3046,30 @@ func convertResponsesMessagesToGeminiContents(messages []schemas.ResponsesMessag } else { responseMap["output"] = output } + } else if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil { + // Handle structured output blocks (e.g. from Anthropic Responses API format + // where output is an array of content blocks like [{"type":"input_text","text":"..."}]) + var textParts []string + for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { + if block.Text != nil && *block.Text != "" { + textParts = append(textParts, *block.Text) + } + } + if len(textParts) > 0 { + combined := strings.Join(textParts, "\n") + if json.Valid([]byte(combined)) { + responseMap["output"] = json.RawMessage(combined) + } else { + responseMap["output"] = combined + } + } else { + // Fallback for non-text blocks (e.g. images, files): marshal the raw blocks + // so responseMap["output"] is never left empty when blocks are present + rawBlocks, err := providerUtils.MarshalSorted(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks) + if err == nil && len(rawBlocks) > 0 { + responseMap["output"] = json.RawMessage(rawBlocks) + } + } } else if msg.Content != nil && msg.Content.ContentStr != nil { // Fallback to Content.ContentStr for backward compatibility output := *msg.Content.ContentStr diff --git a/core/providers/gemini/types.go b/core/providers/gemini/types.go index 75cf9f504f..29935ca23b 100644 --- a/core/providers/gemini/types.go +++ b/core/providers/gemini/types.go @@ -82,6 +82,20 @@ const ( FinishReasonImageSafety FinishReason = "IMAGE_SAFETY" // The tool call generated by the model is invalid. FinishReasonUnexpectedToolCall FinishReason = "UNEXPECTED_TOOL_CALL" + // Image generation stopped because generated images contain prohibited content. + FinishReasonImageProhibitedContent FinishReason = "IMAGE_PROHIBITED_CONTENT" + // Image generation stopped due to other miscellaneous issues. + FinishReasonImageOther FinishReason = "IMAGE_OTHER" + // The model was expected to generate an image, but none was generated. + FinishReasonNoImage FinishReason = "NO_IMAGE" + // Image generation stopped due to recitation. + FinishReasonImageRecitation FinishReason = "IMAGE_RECITATION" + // Model called too many tools consecutively, thus the system exited execution. + FinishReasonTooManyToolCalls FinishReason = "TOO_MANY_TOOL_CALLS" + // Request has at least one thought signature missing. + FinishReasonMissingThoughtSignature FinishReason = "MISSING_THOUGHT_SIGNATURE" + // Finished due to malformed response. + FinishReasonMalformedResponse FinishReason = "MALFORMED_RESPONSE" ) type GeminiGenerationRequest struct { diff --git a/core/providers/gemini/utils.go b/core/providers/gemini/utils.go index 321ec85329..ae4339db0e 100644 --- a/core/providers/gemini/utils.go +++ b/core/providers/gemini/utils.go @@ -85,7 +85,7 @@ func effortToThinkingLevel(effort string, model string) string { return "high" // Pro models don't support medium, use high } return "medium" - case "high": + case "high", "xhigh", "max": return "high" default: if isPro { @@ -197,8 +197,7 @@ func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters() level := *config.ThinkingConfig.ThinkingLevel var effort string - // Map Gemini thinking level to Bifrost effort - switch level { + switch strings.ToLower(level) { case "minimal": effort = "minimal" case "low": @@ -212,12 +211,6 @@ func (r *GeminiGenerationRequest) convertGenerationConfigToResponsesParameters() } params.Reasoning.Effort = schemas.Ptr(effort) - - // Also convert to budget for compatibility - if effort != "none" { - budget, _ := providerUtils.GetBudgetTokensFromReasoningEffort(effort, budgetRange.Min, budgetRange.Max) - params.Reasoning.MaxTokens = schemas.Ptr(budget) - } } } if config.CandidateCount > 0 { @@ -545,18 +538,33 @@ func convertFileDataToBytes(fileData string) ([]byte, string) { var ( // Maps Gemini finish reasons to Bifrost format geminiFinishReasonToBifrost = map[FinishReason]string{ - FinishReasonStop: "stop", - FinishReasonMaxTokens: "length", - FinishReasonSafety: "content_filter", - FinishReasonRecitation: "content_filter", - FinishReasonLanguage: "content_filter", - FinishReasonOther: "stop", - FinishReasonBlocklist: "content_filter", - FinishReasonProhibitedContent: "content_filter", - FinishReasonSPII: "content_filter", - FinishReasonMalformedFunctionCall: "stop", - FinishReasonImageSafety: "content_filter", - FinishReasonUnexpectedToolCall: "tool_calls", + FinishReasonStop: "stop", + FinishReasonMaxTokens: "length", + FinishReasonSafety: "content_filter", + FinishReasonRecitation: "content_filter", + FinishReasonLanguage: "content_filter", + FinishReasonOther: "stop", + FinishReasonBlocklist: "content_filter", + FinishReasonProhibitedContent: "content_filter", + FinishReasonSPII: "content_filter", + FinishReasonMalformedFunctionCall: "stop", + FinishReasonImageSafety: "content_filter", + FinishReasonImageProhibitedContent: "content_filter", + FinishReasonImageOther: "stop", + FinishReasonNoImage: "stop", + FinishReasonImageRecitation: "content_filter", + FinishReasonUnexpectedToolCall: "stop", + FinishReasonTooManyToolCalls: "stop", + FinishReasonMissingThoughtSignature: "stop", + FinishReasonMalformedResponse: "stop", + } + + // Maps Bifrost canonical finish reasons back to the most representative Gemini finish reason + bifrostToGeminiFinishReason = map[string]FinishReason{ + "stop": FinishReasonStop, + "length": FinishReasonMaxTokens, + "content_filter": FinishReasonSafety, + "tool_calls": FinishReasonStop, } ) @@ -568,6 +576,14 @@ func ConvertGeminiFinishReasonToBifrost(providerReason FinishReason) string { return string(providerReason) } +// ConvertBifrostFinishReasonToGemini converts Bifrost canonical finish reasons back to Gemini format. +func ConvertBifrostFinishReasonToGemini(bifrostReason string) FinishReason { + if geminiReason, ok := bifrostToGeminiFinishReason[bifrostReason]; ok { + return geminiReason + } + return FinishReasonStop +} + // ConvertGeminiUsageMetadataToChatUsage converts Gemini usage metadata to Bifrost chat LLM usage func ConvertGeminiUsageMetadataToChatUsage(metadata *GenerateContentResponseUsageMetadata) *schemas.BifrostLLMUsage { if metadata == nil { diff --git a/core/providers/huggingface/huggingface.go b/core/providers/huggingface/huggingface.go index 1fed844d31..4039f2d36b 100644 --- a/core/providers/huggingface/huggingface.go +++ b/core/providers/huggingface/huggingface.go @@ -1160,6 +1160,7 @@ func (provider *HuggingFaceProvider) ImageGenerationStream(ctx *schemas.BifrostC // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer providerUtils.ReleaseStreamingResponse(resp) defer close(responseChan) @@ -1579,6 +1580,7 @@ func (provider *HuggingFaceProvider) ImageEditStream(ctx *schemas.BifrostContext // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer providerUtils.ReleaseStreamingResponse(resp) defer close(responseChan) diff --git a/core/providers/mistral/mistral.go b/core/providers/mistral/mistral.go index 597bf6c239..ec699ddcc2 100644 --- a/core/providers/mistral/mistral.go +++ b/core/providers/mistral/mistral.go @@ -70,7 +70,7 @@ func NewMistralProvider(config *schemas.ProviderConfig, logger schemas.Logger) * // GetProviderKey returns the provider identifier for Mistral. func (provider *MistralProvider) GetProviderKey() schemas.ModelProvider { - return schemas.Mistral + return providerUtils.GetProviderName(schemas.Mistral, provider.customProviderConfig) } // listModelsByKey performs a list models request for a single key. @@ -158,13 +158,27 @@ func (provider *MistralProvider) TextCompletionStream(ctx *schemas.BifrostContex return nil, providerUtils.NewUnsupportedOperationError(schemas.TextCompletionStreamRequest, provider.GetProviderKey()) } +// normalizeChatRequestForConversion returns the request unchanged for the stock Mistral +// provider. For custom aliases (e.g. a provider registered as "custom-mistral" with +// BaseProviderType=Mistral), it returns a shallow copy with Provider set to schemas.Mistral +// so the shared OpenAI converter applies Mistral-specific compatibility (max_completion_tokens +// → max_tokens, tool_choice struct → "any"). The caller's request is never mutated. +func (provider *MistralProvider) normalizeChatRequestForConversion(request *schemas.BifrostChatRequest) *schemas.BifrostChatRequest { + if request == nil || provider.customProviderConfig == nil || request.Provider == schemas.Mistral { + return request + } + normalized := *request + normalized.Provider = schemas.Mistral + return &normalized +} + // ChatCompletion performs a chat completion request to the Mistral API. func (provider *MistralProvider) ChatCompletion(ctx *schemas.BifrostContext, key schemas.Key, request *schemas.BifrostChatRequest) (*schemas.BifrostChatResponse, *schemas.BifrostError) { return openai.HandleOpenAIChatCompletionRequest( ctx, provider.client, provider.networkConfig.BaseURL+providerUtils.GetPathFromContext(ctx, "/v1/chat/completions"), - request, + provider.normalizeChatRequestForConversion(request), key, provider.networkConfig.ExtraHeaders, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), @@ -190,7 +204,7 @@ func (provider *MistralProvider) ChatCompletionStream(ctx *schemas.BifrostContex ctx, provider.client, provider.networkConfig.BaseURL+"/v1/chat/completions", - request, + provider.normalizeChatRequestForConversion(request), authHeader, provider.networkConfig.ExtraHeaders, providerUtils.ShouldSendBackRawRequest(ctx, provider.sendBackRawRequest), @@ -602,6 +616,7 @@ func (provider *MistralProvider) TranscriptionStream(ctx *schemas.BifrostContext // which immediately unblocks any in-progress read (including reads blocked inside a gzip decompression layer). stopCancellation := providerUtils.SetupStreamCancellation(ctx, resp.BodyStream(), provider.logger) defer stopCancellation() + defer providerUtils.EnsureStreamFinalizerCalled(ctx) sseReader := providerUtils.GetSSEEventReader(ctx, reader) chunkIndex := -1 diff --git a/core/providers/openai/chat.go b/core/providers/openai/chat.go index 2e2bfd43b8..48141f46d1 100644 --- a/core/providers/openai/chat.go +++ b/core/providers/openai/chat.go @@ -104,12 +104,17 @@ func (req *OpenAIChatRequest) filterOpenAISpecificParameters() { // Handle reasoning parameter: OpenAI uses effort-based reasoning // Priority: effort (native) > max_tokens (estimated) if req.ChatParameters.Reasoning != nil { + reasoningCopy := *req.ChatParameters.Reasoning + req.ChatParameters.Reasoning = &reasoningCopy if req.ChatParameters.Reasoning.Effort != nil { // Native field is provided, use it (and clear max_tokens) effort := *req.ChatParameters.Reasoning.Effort - // Convert "minimal" to "low" for non-OpenAI providers - if effort == "minimal" { + // Convert "minimal" to "low"; cap "xhigh"/"max" to "high" — OpenAI tops out at high. + switch effort { + case "minimal": req.ChatParameters.Reasoning.Effort = schemas.Ptr("low") + case "xhigh", "max": + req.ChatParameters.Reasoning.Effort = schemas.Ptr("high") } // Clear max_tokens since OpenAI doesn't use it req.ChatParameters.Reasoning.MaxTokens = nil diff --git a/core/providers/openai/chat_test.go b/core/providers/openai/chat_test.go index f391f821cb..f7f0e15e95 100644 --- a/core/providers/openai/chat_test.go +++ b/core/providers/openai/chat_test.go @@ -2,11 +2,13 @@ package openai import ( "encoding/json" + "strings" "testing" "github.com/bytedance/sonic" providerUtils "github.com/maximhq/bifrost/core/providers/utils" "github.com/maximhq/bifrost/core/schemas" + "github.com/stretchr/testify/require" ) func TestToOpenAIChatRequest_ToolNormalization(t *testing.T) { @@ -78,6 +80,32 @@ func TestToOpenAIChatRequest_ToolNormalization(t *testing.T) { } } +func TestToOpenAIChatRequest_PreservesN(t *testing.T) { + req := &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4.1", + Input: []schemas.ChatMessage{ + { + Role: schemas.ChatMessageRoleUser, + Content: &schemas.ChatMessageContent{ + ContentStr: schemas.Ptr("hello"), + }, + }, + }, + Params: &schemas.ChatParameters{ + N: schemas.Ptr(2), + }, + } + + out := ToOpenAIChatRequest(schemas.NewBifrostContext(nil, schemas.NoDeadline), req) + if out == nil { + t.Fatal("expected request") + } + if out.N == nil || *out.N != 2 { + t.Fatalf("expected n=2, got %#v", out.N) + } +} + func TestToOpenAIChatRequest_PreservesPropertyOrder(t *testing.T) { params := &schemas.ToolFunctionParameters{ Type: "object", @@ -307,6 +335,68 @@ func TestToOpenAIChatRequest_FireworksPreservesReasoningAndCacheIsolation(t *tes } } +// TestToOpenAIChatRequest_AnnotationsNotInWirePayload verifies that MCPToolAnnotations +// (stored on ChatTool with json:"-") are never included in the JSON body sent to OpenAI. +func TestToOpenAIChatRequest_AnnotationsNotInWirePayload(t *testing.T) { + readOnly := true + + bifrostReq := &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4o", + Input: []schemas.ChatMessage{ + {Role: schemas.ChatMessageRoleUser, Content: &schemas.ChatMessageContent{ContentStr: schemas.Ptr("hello")}}, + }, + Params: &schemas.ChatParameters{ + Tools: []schemas.ChatTool{ + { + Type: schemas.ChatToolTypeFunction, + Function: &schemas.ChatToolFunction{ + Name: "read_file", + Description: schemas.Ptr("Read a file"), + Parameters: &schemas.ToolFunctionParameters{ + Type: "object", + Properties: schemas.NewOrderedMapFromPairs( + schemas.KV("path", map[string]interface{}{"type": "string"}), + ), + Required: []string{"path"}, + }, + }, + Annotations: &schemas.MCPToolAnnotations{ + Title: "File Reader", + ReadOnlyHint: &readOnly, + }, + }, + }, + }, + } + + ctx, cancel := schemas.NewBifrostContextWithCancel(nil) + defer cancel() + + result := ToOpenAIChatRequest(ctx, bifrostReq) + require.NotNil(t, result) + + wireBody, err := json.Marshal(result) + require.NoError(t, err) + s := string(wireBody) + + // Annotations must be absent from the wire payload + if strings.Contains(s, "annotations") { + t.Errorf("annotations field leaked into OpenAI wire payload: %s", s) + } + if strings.Contains(s, "readOnlyHint") { + t.Errorf("readOnlyHint leaked into OpenAI wire payload: %s", s) + } + if strings.Contains(s, "File Reader") { + t.Errorf("annotation title leaked into OpenAI wire payload: %s", s) + } + + // The function definition must still be intact + if !strings.Contains(s, "read_file") { + t.Errorf("function name missing from OpenAI wire payload: %s", s) + } +} + func TestApplyXAICompatibility(t *testing.T) { tests := []struct { name string diff --git a/core/providers/openai/openai.go b/core/providers/openai/openai.go index 209cd5b1b2..810ee583d7 100644 --- a/core/providers/openai/openai.go +++ b/core/providers/openai/openai.go @@ -531,6 +531,7 @@ func HandleOpenAITextCompletionStreaming( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.TextCompletionStreamRequest, logger) @@ -1090,6 +1091,7 @@ func HandleOpenAIChatCompletionStreaming( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, streamRequestType, logger) @@ -1319,7 +1321,7 @@ func HandleOpenAIChatCompletionStreaming( // Handle regular content chunks, including reasoning if choice.ChatStreamResponseChoice != nil && choice.ChatStreamResponseChoice.Delta != nil && - (choice.ChatStreamResponseChoice.Delta.Content != nil || + ((choice.ChatStreamResponseChoice.Delta.Content != nil && *choice.ChatStreamResponseChoice.Delta.Content != "") || choice.ChatStreamResponseChoice.Delta.Reasoning != nil || len(choice.ChatStreamResponseChoice.Delta.ReasoningDetails) > 0 || choice.ChatStreamResponseChoice.Delta.Audio != nil || @@ -1694,6 +1696,7 @@ func HandleOpenAIResponsesStreaming( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ResponsesStreamRequest, logger) @@ -1808,9 +1811,17 @@ func HandleOpenAIResponsesStreaming( if response.Code != nil { bifrostErr.Error.Code = response.Code } + if response.Response != nil && response.Response.Error != nil { + if response.Response.Error.Message != "" && bifrostErr.Error.Message == "" { + bifrostErr.Error.Message = response.Response.Error.Message + } + if response.Response.Error.Code != "" && (bifrostErr.Error.Code == nil || *bifrostErr.Error.Code == "") { + bifrostErr.Error.Code = schemas.Ptr(response.Response.Error.Code) + } + } ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true) - providerUtils.ProcessAndSendBifrostError(ctx, postHookRunner, providerUtils.EnrichError(ctx, bifrostErr, jsonBody, nil, sendBackRawRequest, sendBackRawResponse), responseChan, logger) + providerUtils.ProcessAndSendBifrostError(ctx, postHookRunner, providerUtils.EnrichError(ctx, bifrostErr, jsonBody, []byte(jsonData), sendBackRawRequest, sendBackRawResponse), responseChan, logger) return } @@ -1832,7 +1843,7 @@ func HandleOpenAIResponsesStreaming( bifrostErr.Error.Code = &response.Response.Error.Code } ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true) - providerUtils.ProcessAndSendBifrostError(ctx, postHookRunner, providerUtils.EnrichError(ctx, bifrostErr, jsonBody, nil, sendBackRawRequest, sendBackRawResponse), responseChan, logger) + providerUtils.ProcessAndSendBifrostError(ctx, postHookRunner, providerUtils.EnrichError(ctx, bifrostErr, jsonBody, []byte(jsonData), sendBackRawRequest, sendBackRawResponse), responseChan, logger) return } @@ -1858,7 +1869,6 @@ func HandleOpenAIResponsesStreaming( providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, &response, nil, nil, nil), responseChan) } } - }() return responseChan, nil @@ -2308,6 +2318,7 @@ func HandleOpenAISpeechStreamRequest( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.SpeechStreamRequest, logger) @@ -2425,7 +2436,6 @@ func HandleOpenAISpeechStreamRequest( providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, &response, nil, nil), responseChan) } - }() return responseChan, nil @@ -2583,7 +2593,7 @@ func HandleOpenAITranscriptionRequest( return nil, providerUtils.NewBifrostOperationError(schemas.ErrProviderResponseUnmarshal, err, providerName) } - //TODO: add HandleProviderResponse here + // TODO: add HandleProviderResponse here // Parse raw response for RawResponse field if sendBackRawResponse { @@ -2750,6 +2760,7 @@ func HandleOpenAITranscriptionStreamRequest( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.TranscriptionStreamRequest, logger) @@ -2887,7 +2898,6 @@ func HandleOpenAITranscriptionStreamRequest( providerUtils.ProcessAndSendResponse(ctx, postHookRunner, providerUtils.GetBifrostResponseForStreamResponse(nil, nil, nil, nil, response, nil), responseChan) } - }() return responseChan, nil @@ -2897,8 +2907,8 @@ func HandleOpenAITranscriptionStreamRequest( // It formats the request, sends it to OpenAI, and processes the response. // Returns a BifrostResponse containing the bifrost response or an error if the request fails. func (provider *OpenAIProvider) ImageGeneration(ctx *schemas.BifrostContext, key schemas.Key, - req *schemas.BifrostImageGenerationRequest) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) { - + req *schemas.BifrostImageGenerationRequest, +) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) { if err := providerUtils.CheckOperationAllowed(schemas.OpenAI, provider.customProviderConfig, schemas.ImageGenerationRequest); err != nil { return nil, err } @@ -2931,7 +2941,6 @@ func HandleOpenAIImageGenerationRequest( sendBackRawResponse bool, logger schemas.Logger, ) (*schemas.BifrostImageGenerationResponse, *schemas.BifrostError) { - // Create request req := fasthttp.AcquireRequest() resp := fasthttp.AcquireResponse() @@ -3052,7 +3061,6 @@ func (provider *OpenAIProvider) ImageGenerationStream( key schemas.Key, request *schemas.BifrostImageGenerationRequest, ) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { - if request == nil { return nil, providerUtils.NewBifrostOperationError("invalid request: nil", nil, provider.GetProviderKey()) } @@ -3101,7 +3109,6 @@ func HandleOpenAIImageGenerationStreaming( postResponseConverter func(*schemas.BifrostImageGenerationStreamResponse) *schemas.BifrostImageGenerationStreamResponse, logger schemas.Logger, ) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { - // Set headers headers := map[string]string{ "Content-Type": "application/json", @@ -3202,6 +3209,7 @@ func HandleOpenAIImageGenerationStreaming( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ImageGenerationStreamRequest, logger) @@ -3463,7 +3471,6 @@ func HandleOpenAIImageGenerationStreaming( return } } - }() return responseChan, nil @@ -4377,7 +4384,6 @@ func HandleOpenAIImageEditStreamRequest( postResponseConverter func(*schemas.BifrostImageGenerationStreamResponse) *schemas.BifrostImageGenerationStreamResponse, logger schemas.Logger, ) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError) { - reqBody := ToOpenAIImageEditRequest(request) if reqBody == nil { return nil, providerUtils.NewBifrostOperationError("image edit input is not provided", nil, providerName) @@ -4466,6 +4472,7 @@ func HandleOpenAIImageEditStreamRequest( // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ImageEditStreamRequest, logger) @@ -4723,7 +4730,6 @@ func HandleOpenAIImageEditStreamRequest( return } } - }() return responseChan, nil @@ -7210,6 +7216,7 @@ func (provider *OpenAIProvider) PassthroughStream( ch := make(chan *schemas.BifrostStreamChunk, schemas.DefaultStreamBufferSize) go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, ch, provider.GetProviderKey(), req.Model, schemas.PassthroughStreamRequest, provider.logger) diff --git a/core/providers/openai/responses.go b/core/providers/openai/responses.go index 23f59c5155..e8efee4689 100644 --- a/core/providers/openai/responses.go +++ b/core/providers/openai/responses.go @@ -201,9 +201,12 @@ func ToOpenAIResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) *Open if req.ResponsesParameters.Reasoning.Effort != nil { // Native field is provided, use it (and clear max_tokens) effort := *req.ResponsesParameters.Reasoning.Effort - // Convert "minimal" to "low" for non-OpenAI providers - if effort == "minimal" { + // Convert "minimal" to "low"; cap "xhigh"/"max" to "high" — OpenAI tops out at high. + switch effort { + case "minimal": req.ResponsesParameters.Reasoning.Effort = schemas.Ptr("low") + case "xhigh", "max": + req.ResponsesParameters.Reasoning.Effort = schemas.Ptr("high") } // Clear max_tokens since OpenAI doesn't use it req.ResponsesParameters.Reasoning.MaxTokens = nil @@ -220,6 +223,11 @@ func ToOpenAIResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) *Open req.ResponsesParameters.Reasoning.MaxTokens = nil } + // summary:"none" is Anthropic-specific (maps to display:"omitted"); strip it for OpenAI. + if req.ResponsesParameters.Reasoning.Summary != nil && *req.ResponsesParameters.Reasoning.Summary == "none" { + req.ResponsesParameters.Reasoning.Summary = nil + } + // Handle xAI-specific parameter filtering // Only grok-3-mini supports reasoning_effort if bifrostReq.Provider == schemas.XAI && diff --git a/core/providers/openai/responses_marshal_test.go b/core/providers/openai/responses_marshal_test.go index d9f8616a18..092e1eb813 100644 --- a/core/providers/openai/responses_marshal_test.go +++ b/core/providers/openai/responses_marshal_test.go @@ -523,3 +523,350 @@ func TestOpenAIResponsesRequest_MarshalJSON_RoundTrip(t *testing.T) { } }) } + +// Regression test for multi-turn Anthropic tool_result with array-form content. +// The OpenAI Responses API defines function_call_output.output as a string (see +// https://platform.openai.com/docs/api-reference/responses/create). When an +// Anthropic client sends a tool_result whose content is an array of text blocks, +// Bifrost's Anthropic→Responses translator populates +// ResponsesToolMessageOutputStruct.ResponsesFunctionToolCallOutputBlocks. +// Historically, that array was marshaled verbatim onto the wire, which some +// strict OpenAI-compat upstreams (e.g. Ollama Cloud) reject with an error like +// +// json: cannot unmarshal array into Go struct field ResponsesFunctionCallOutput.output of type string +// +// The outgoing OpenAI Responses request must emit `output` as a string for +// text-only tool outputs. +func TestOpenAIResponsesRequestInput_MarshalJSON_FunctionCallOutputFlattensTextBlocksToString(t *testing.T) { + outputText := "line1" + callID := "toolu_abc123" + functionName := "read_file" + + input := &OpenAIResponsesRequestInput{ + OpenAIResponsesRequestInputArray: []schemas.ResponsesMessage{ + { + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Content: &schemas.ResponsesMessageContent{ + ContentStr: schemas.Ptr("Read /tmp/test.txt and tell me what it contains."), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCall), + Status: schemas.Ptr("completed"), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(callID), + Name: schemas.Ptr(functionName), + Arguments: schemas.Ptr(`{"path":"/tmp/test.txt"}`), + }, + }, + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + Status: schemas.Ptr("completed"), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(callID), + Output: &schemas.ResponsesToolMessageOutputStruct{ + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + { + Type: schemas.ResponsesInputMessageContentBlockTypeText, + Text: schemas.Ptr(outputText), + }, + }, + }, + }, + }, + }, + } + + jsonBytes, err := input.MarshalJSON() + if err != nil { + t.Fatalf("Failed to marshal OpenAIResponsesRequestInput: %v", err) + } + + var messages []map[string]interface{} + if err := sonic.Unmarshal(jsonBytes, &messages); err != nil { + t.Fatalf("Failed to unmarshal marshaled input as array: %v\nraw=%s", err, string(jsonBytes)) + } + + var fcoMsg map[string]interface{} + for _, m := range messages { + if t, ok := m["type"].(string); ok && t == string(schemas.ResponsesMessageTypeFunctionCallOutput) { + fcoMsg = m + break + } + } + if fcoMsg == nil { + t.Fatalf("did not find function_call_output message in marshaled JSON: %s", string(jsonBytes)) + } + + outputVal, ok := fcoMsg["output"] + if !ok { + t.Fatalf("function_call_output message has no `output` field: %s", string(jsonBytes)) + } + + outputStr, isString := outputVal.(string) + if !isString { + t.Fatalf("function_call_output.output must be a string (OpenAI Responses API spec); got %T: %v\nraw=%s", outputVal, outputVal, string(jsonBytes)) + } + if outputStr != outputText { + t.Fatalf("function_call_output.output mismatch: want %q, got %q", outputText, outputStr) + } +} + +// Flattening must concatenate multiple text blocks with newline separators so +// every character from the upstream tool response reaches the model. +func TestOpenAIResponsesRequestInput_MarshalJSON_FunctionCallOutputConcatenatesMultipleTextBlocks(t *testing.T) { + callID := "toolu_multi" + input := &OpenAIResponsesRequestInput{ + OpenAIResponsesRequestInputArray: []schemas.ResponsesMessage{ + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + Status: schemas.Ptr("completed"), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(callID), + Output: &schemas.ResponsesToolMessageOutputStruct{ + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr("line1")}, + {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr("line2")}, + }, + }, + }, + }, + }, + } + + jsonBytes, err := input.MarshalJSON() + if err != nil { + t.Fatalf("Failed to marshal: %v", err) + } + var messages []map[string]interface{} + if err := sonic.Unmarshal(jsonBytes, &messages); err != nil { + t.Fatalf("Failed to unmarshal: %v\nraw=%s", err, string(jsonBytes)) + } + if len(messages) != 1 { + t.Fatalf("expected 1 message, got %d", len(messages)) + } + got, ok := messages[0]["output"].(string) + if !ok { + t.Fatalf("output must be string, got %T", messages[0]["output"]) + } + if want := "line1\nline2"; got != want { + t.Fatalf("flattened output mismatch: want %q, got %q", want, got) + } +} + +// When the tool result contains a non-text block (e.g. an image), flattening is +// unsafe — preserve the array form and let the upstream handle it. This keeps +// the fix scoped to the common text-only case without dropping rich content. +func TestOpenAIResponsesRequestInput_MarshalJSON_FunctionCallOutputPreservesNonTextBlocks(t *testing.T) { + callID := "toolu_with_image" + imageURL := "https://example.com/screenshot.png" + input := &OpenAIResponsesRequestInput{ + OpenAIResponsesRequestInputArray: []schemas.ResponsesMessage{ + { + Type: schemas.Ptr(schemas.ResponsesMessageTypeFunctionCallOutput), + Status: schemas.Ptr("completed"), + ResponsesToolMessage: &schemas.ResponsesToolMessage{ + CallID: schemas.Ptr(callID), + Output: &schemas.ResponsesToolMessageOutputStruct{ + ResponsesFunctionToolCallOutputBlocks: []schemas.ResponsesMessageContentBlock{ + {Type: schemas.ResponsesInputMessageContentBlockTypeText, Text: schemas.Ptr("here is the screenshot:")}, + { + Type: schemas.ResponsesInputMessageContentBlockTypeImage, + ResponsesInputMessageContentBlockImage: &schemas.ResponsesInputMessageContentBlockImage{ + ImageURL: &imageURL, + }, + }, + }, + }, + }, + }, + }, + } + jsonBytes, err := input.MarshalJSON() + if err != nil { + t.Fatalf("Failed to marshal: %v", err) + } + var messages []map[string]interface{} + if err := sonic.Unmarshal(jsonBytes, &messages); err != nil { + t.Fatalf("Failed to unmarshal: %v\nraw=%s", err, string(jsonBytes)) + } + if _, isString := messages[0]["output"].(string); isString { + t.Fatalf("non-text blocks must not be flattened to string; raw=%s", string(jsonBytes)) + } +} + +// TestOpenAIResponsesRequest_MarshalJSON_StripsAnthropicToolFlags ensures the +// Responses serializer drops the four Anthropic-native tool flags +// (defer_loading, allowed_callers, input_examples, eager_input_streaming) +// along with CacheControl before forwarding to OpenAI — mirroring the Chat +// path's behavior so Anthropic-flavored tools cannot 400 OpenAI via Responses. +func TestOpenAIResponsesRequest_MarshalJSON_StripsAnthropicToolFlags(t *testing.T) { + req := &OpenAIResponsesRequest{ + Model: "gpt-4o", + Input: OpenAIResponsesRequestInput{ + OpenAIResponsesRequestInputArray: []schemas.ResponsesMessage{ + { + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Content: &schemas.ResponsesMessageContent{ + ContentStr: schemas.Ptr("hello"), + }, + }, + }, + }, + ResponsesParameters: schemas.ResponsesParameters{ + Tools: []schemas.ResponsesTool{ + { + Type: schemas.ResponsesToolTypeFunction, + Name: schemas.Ptr("lookup"), + Description: schemas.Ptr("lookup something"), + CacheControl: &schemas.CacheControl{Type: "ephemeral"}, + DeferLoading: schemas.Ptr(true), + AllowedCallers: []string{"direct", "agent"}, + EagerInputStreaming: schemas.Ptr(false), + InputExamples: []schemas.ChatToolInputExample{ + {Input: json.RawMessage(`{"q":"hi"}`)}, + }, + ResponsesToolFunction: &schemas.ResponsesToolFunction{}, + }, + }, + }, + } + + jsonBytes, err := req.MarshalJSON() + if err != nil { + t.Fatalf("marshal failed: %v", err) + } + raw := string(jsonBytes) + + // None of the five Anthropic-only tool keys must survive on the wire. + for _, key := range []string{`"cache_control"`, `"defer_loading"`, `"allowed_callers"`, `"input_examples"`, `"eager_input_streaming"`} { + if strings.Contains(raw, key) { + t.Errorf("OpenAI Responses serializer must strip %s; raw=%s", key, raw) + } + } + // Function tool identity should be preserved. + if !strings.Contains(raw, `"name":"lookup"`) { + t.Errorf("tool identity lost after strip; raw=%s", raw) + } +} + +// TestOpenAIResponsesRequest_MarshalJSON_DropsAnthropicOnlyToolTypes verifies +// that Anthropic-only tool types (web_fetch, memory) are dropped entirely when +// serializing for OpenAI Responses. Per OpenAI's OpenAPI spec the Responses +// Tool discriminator union does not include web_fetch or memory, so forwarding +// them would trigger a 400 schema-validation error. Mirrors the Chat path's +// isAnthropicServerToolShape drop behavior. +func TestOpenAIResponsesRequest_MarshalJSON_DropsAnthropicOnlyToolTypes(t *testing.T) { + req := &OpenAIResponsesRequest{ + Model: "gpt-4o", + Input: OpenAIResponsesRequestInput{ + OpenAIResponsesRequestInputArray: []schemas.ResponsesMessage{ + { + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Content: &schemas.ResponsesMessageContent{ + ContentStr: schemas.Ptr("hello"), + }, + }, + }, + }, + ResponsesParameters: schemas.ResponsesParameters{ + Tools: []schemas.ResponsesTool{ + // Kept: function (OpenAI-native). + { + Type: schemas.ResponsesToolTypeFunction, + Name: schemas.Ptr("keeper_func"), + ResponsesToolFunction: &schemas.ResponsesToolFunction{}, + }, + // Dropped: web_fetch (Anthropic-only). + { + Type: schemas.ResponsesToolTypeWebFetch, + Name: schemas.Ptr("anthropic_webfetch"), + ResponsesToolWebFetch: &schemas.ResponsesToolWebFetch{}, + }, + // Kept: web_search (both support). + { + Type: schemas.ResponsesToolTypeWebSearch, + ResponsesToolWebSearch: &schemas.ResponsesToolWebSearch{}, + }, + // Dropped: memory (Anthropic-only). + { + Type: schemas.ResponsesToolTypeMemory, + Name: schemas.Ptr("anthropic_memory"), + }, + // Kept: tool_search (both support per OpenAI OpenAPI spec). + { + Type: schemas.ResponsesToolTypeToolSearch, + }, + }, + }, + } + + jsonBytes, err := req.MarshalJSON() + if err != nil { + t.Fatalf("marshal failed: %v", err) + } + raw := string(jsonBytes) + + // Dropped types must not appear on the wire. + for _, dropped := range []string{`"web_fetch"`, `"memory"`, `"anthropic_webfetch"`, `"anthropic_memory"`} { + if strings.Contains(raw, dropped) { + t.Errorf("Anthropic-only tool must be dropped; found %s in raw=%s", dropped, raw) + } + } + // Kept types must still appear. + for _, kept := range []string{`"function"`, `"web_search"`, `"tool_search"`, `"keeper_func"`} { + if !strings.Contains(raw, kept) { + t.Errorf("supported tool %s should be preserved; raw=%s", kept, raw) + } + } + + // Confirm the tools array is present and has exactly 3 entries (2 dropped of 5). + var decoded struct { + Tools []map[string]interface{} `json:"tools"` + } + if err := json.Unmarshal(jsonBytes, &decoded); err != nil { + t.Fatalf("decode failed: %v", err) + } + if len(decoded.Tools) != 3 { + t.Errorf("expected 3 tools after drop (function, web_search, tool_search), got %d; tools=%+v", len(decoded.Tools), decoded.Tools) + } +} + +// TestOpenAIResponsesRequest_MarshalJSON_KeepsAllWhenAllSupported verifies the +// no-reshape fast path: if every tool is OpenAI-compatible with no +// Anthropic-only flags, the tools slice passes through unchanged (no copy, +// no drop). +func TestOpenAIResponsesRequest_MarshalJSON_KeepsAllWhenAllSupported(t *testing.T) { + req := &OpenAIResponsesRequest{ + Model: "gpt-4o", + Input: OpenAIResponsesRequestInput{ + OpenAIResponsesRequestInputArray: []schemas.ResponsesMessage{ + { + Role: schemas.Ptr(schemas.ResponsesInputMessageRoleUser), + Content: &schemas.ResponsesMessageContent{ContentStr: schemas.Ptr("hi")}, + }, + }, + }, + ResponsesParameters: schemas.ResponsesParameters{ + Tools: []schemas.ResponsesTool{ + {Type: schemas.ResponsesToolTypeFunction, Name: schemas.Ptr("f"), ResponsesToolFunction: &schemas.ResponsesToolFunction{}}, + {Type: schemas.ResponsesToolTypeWebSearch, ResponsesToolWebSearch: &schemas.ResponsesToolWebSearch{}}, + {Type: schemas.ResponsesToolTypeCodeInterpreter, ResponsesToolCodeInterpreter: &schemas.ResponsesToolCodeInterpreter{}}, + }, + }, + } + + jsonBytes, err := req.MarshalJSON() + if err != nil { + t.Fatalf("marshal failed: %v", err) + } + var decoded struct { + Tools []map[string]interface{} `json:"tools"` + } + if err := json.Unmarshal(jsonBytes, &decoded); err != nil { + t.Fatalf("decode failed: %v", err) + } + if len(decoded.Tools) != 3 { + t.Errorf("expected 3 tools preserved, got %d", len(decoded.Tools)) + } +} diff --git a/core/providers/openai/types.go b/core/providers/openai/types.go index 89de4e1e66..a559fbac06 100644 --- a/core/providers/openai/types.go +++ b/core/providers/openai/types.go @@ -4,10 +4,11 @@ import ( "encoding/json" "errors" "fmt" + "strings" "github.com/bytedance/sonic" - "github.com/maximhq/bifrost/core/schemas" providerUtils "github.com/maximhq/bifrost/core/providers/utils" + "github.com/maximhq/bifrost/core/schemas" ) const MinMaxCompletionTokens = 16 @@ -184,27 +185,42 @@ func (req *OpenAIChatRequest) MarshalJSON() ([]byte, error) { processedMessages = req.Messages } - // Process tools if needed + // Process tools if needed. + // On outbound to OpenAI we need to: + // (a) Strip CacheControl (Anthropic-only, existing behavior). + // (b) Drop Anthropic server tools entirely (Function == nil && Custom == nil); + // OpenAI won't accept web_search_20260209 etc. + // (c) Strip Anthropic-native per-tool flags (DeferLoading, AllowedCallers, + // InputExamples, EagerInputStreaming) when they're set on function tools. var processedTools []schemas.ChatTool if len(req.Tools) > 0 { - needsToolCopy := false + needsToolChange := false for _, tool := range req.Tools { - if tool.CacheControl != nil { - needsToolCopy = true + if tool.CacheControl != nil || isAnthropicServerToolShape(tool) || hasAnthropicOnlyToolFlags(tool) { + needsToolChange = true break } } - if needsToolCopy { - processedTools = make([]schemas.ChatTool, len(req.Tools)) - for i, tool := range req.Tools { - if tool.CacheControl != nil { - toolCopy := tool - toolCopy.CacheControl = nil - processedTools[i] = toolCopy - } else { - processedTools[i] = tool + if needsToolChange { + processedTools = make([]schemas.ChatTool, 0, len(req.Tools)) + for _, tool := range req.Tools { + // Drop Anthropic server tools (no function/custom payload). + // OpenAI would reject the request if we forwarded them. + if isAnthropicServerToolShape(tool) { + continue } + if tool.CacheControl == nil && !hasAnthropicOnlyToolFlags(tool) { + processedTools = append(processedTools, tool) + continue + } + toolCopy := tool + toolCopy.CacheControl = nil + toolCopy.DeferLoading = nil + toolCopy.AllowedCallers = nil + toolCopy.InputExamples = nil + toolCopy.EagerInputStreaming = nil + processedTools = append(processedTools, toolCopy) } } else { processedTools = req.Tools @@ -427,8 +443,23 @@ func (r *OpenAIResponsesRequestInput) MarshalJSON() ([]byte, error) { } } - // Strip CacheControl and FileType from tool message output blocks if needed - if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil { + // Collapse text-only tool output blocks into a single string. + // OpenAI's Responses API defines function_call_output.output as + // a string; Anthropic's multi-turn tool_result content arrives + // as an array of content blocks and has to be flattened here. + // Strict upstream implementations (e.g. Ollama Cloud) return a + // 400 otherwise. + if msg.ResponsesToolMessage.Output != nil && + msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil && + isFunctionCallOutputBlocksFlattenable(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks) { + flattened := flattenFunctionCallOutputBlocks(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks) + outputCopy := *msg.ResponsesToolMessage.Output + outputCopy.ResponsesToolCallOutputStr = &flattened + outputCopy.ResponsesFunctionToolCallOutputBlocks = nil + toolMsgCopy.Output = &outputCopy + toolMsgModified = true + } else if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil { + // Strip CacheControl and FileType from tool message output blocks if needed hasToolModification := false for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { if block.CacheControl != nil || block.Citations != nil || (block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileType != nil) { @@ -473,6 +504,52 @@ func (r *OpenAIResponsesRequestInput) MarshalJSON() ([]byte, error) { } // Helper function to check if a chat message has any CacheControl fields or FileType in file blocks +// isAnthropicServerToolShape reports whether the tool carries the Anthropic +// server-tool shape (Function and Custom both nil). On outbound to OpenAI, +// these must be dropped — OpenAI doesn't accept tool types like +// web_search_20260209, computer_20251124, mcp_toolset, etc. +func isAnthropicServerToolShape(t schemas.ChatTool) bool { + return t.Function == nil && t.Custom == nil +} + +// hasAnthropicOnlyToolFlags reports whether the tool carries any of the +// Anthropic-native flags that OpenAI would reject (DeferLoading, +// AllowedCallers, InputExamples, EagerInputStreaming). Strip these when +// forwarding to OpenAI. +func hasAnthropicOnlyToolFlags(t schemas.ChatTool) bool { + return t.DeferLoading != nil || + len(t.AllowedCallers) > 0 || + len(t.InputExamples) > 0 || + t.EagerInputStreaming != nil +} + +// hasAnthropicOnlyResponsesToolFlags is the ResponsesTool-typed parallel of +// hasAnthropicOnlyToolFlags. The four flags were promoted onto ResponsesTool +// in core/schemas/responses.go for the Anthropic-via-Responses path; the +// OpenAI Responses serializer must strip them so they don't leak to OpenAI +// and trigger a 400 on unknown fields. +func hasAnthropicOnlyResponsesToolFlags(t schemas.ResponsesTool) bool { + return t.DeferLoading != nil || + len(t.AllowedCallers) > 0 || + len(t.InputExamples) > 0 || + t.EagerInputStreaming != nil +} + +// isAnthropicOnlyResponsesToolType reports whether the tool type exists only +// in Anthropic's taxonomy and is not part of OpenAI's Responses API Tool union +// (per OpenAI's OpenAPI spec component.schemas.Tool, which enumerates function, +// file_search, computer[_use_preview], web_search[_preview], mcp, +// code_interpreter, image_generation, local_shell, custom, tool_search, and +// related shell/namespace/apply_patch variants). Forwarding web_fetch or +// memory to OpenAI guarantees a 400 on schema discriminator validation, so +// these get dropped in the Responses→OpenAI serializer — mirroring the Chat +// path's isAnthropicServerToolShape drop behavior for schema parity across +// both endpoints. +func isAnthropicOnlyResponsesToolType(t schemas.ResponsesTool) bool { + return t.Type == schemas.ResponsesToolTypeWebFetch || + t.Type == schemas.ResponsesToolTypeMemory +} + func hasFieldsToStripInChatMessage(msg OpenAIMessage) bool { if msg.Content != nil && msg.Content.ContentBlocks != nil { for _, block := range msg.Content.ContentBlocks { @@ -527,6 +604,12 @@ func hasFieldsToStripInResponsesMessage(msg schemas.ResponsesMessage) bool { } // Check output blocks if msg.ResponsesToolMessage.Output != nil && msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks != nil { + // Text-only block arrays must be flattened to a string — OpenAI's + // Responses API defines function_call_output.output as a string + // and strict upstreams reject the array form. + if isFunctionCallOutputBlocksFlattenable(msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks) { + return true + } for _, block := range msg.ResponsesToolMessage.Output.ResponsesFunctionToolCallOutputBlocks { if block.CacheControl != nil { return true @@ -540,6 +623,41 @@ func hasFieldsToStripInResponsesMessage(msg schemas.ResponsesMessage) bool { return false } +// isFunctionCallOutputBlocksFlattenable reports whether a function_call_output +// content block slice contains only text blocks and can therefore be collapsed +// into a single string for the OpenAI Responses API wire format. +func isFunctionCallOutputBlocksFlattenable(blocks []schemas.ResponsesMessageContentBlock) bool { + if len(blocks) == 0 { + return false + } + for _, block := range blocks { + if block.Type != schemas.ResponsesInputMessageContentBlockTypeText && + block.Type != schemas.ResponsesOutputMessageContentTypeText { + return false + } + if block.Text == nil { + return false + } + } + return true +} + +// flattenFunctionCallOutputBlocks concatenates the text of every block in the +// slice. Callers must first verify flattenability via +// isFunctionCallOutputBlocksFlattenable. +func flattenFunctionCallOutputBlocks(blocks []schemas.ResponsesMessageContentBlock) string { + var b strings.Builder + for i, block := range blocks { + if i > 0 { + b.WriteByte('\n') + } + if block.Text != nil { + b.WriteString(*block.Text) + } + } + return b.String() +} + // filterSupportedAnnotations filters out unsupported (non-OpenAI native) citation types // OpenAI supports: file_citation, url_citation, container_file_citation, file_path func filterSupportedAnnotations(annotations []schemas.ResponsesOutputMessageContentTextAnnotation) []schemas.ResponsesOutputMessageContentTextAnnotation { @@ -604,27 +722,45 @@ func (resp *OpenAIResponsesRequest) MarshalJSON() ([]byte, error) { return nil, err } - // Process tools if needed + // Process tools if needed. + // Mirrors the Chat path (see ChatRequest.MarshalJSON) so the same + // Anthropic-flavored tool payload doesn't leak via the Responses serializer: + // (a) Drop Anthropic-only tool TYPES entirely (web_fetch, memory) since + // OpenAI's Responses Tool union doesn't include them — forwarding + // would 400 on the discriminator. + // (b) Strip CacheControl (Anthropic-only schema field). + // (c) Strip the four Anthropic-native per-tool flags (DeferLoading, + // AllowedCallers, InputExamples, EagerInputStreaming). var processedTools []schemas.ResponsesTool if len(resp.Tools) > 0 { - needsToolCopy := false + needsReshape := false for _, tool := range resp.Tools { - if tool.CacheControl != nil { - needsToolCopy = true + if isAnthropicOnlyResponsesToolType(tool) || + tool.CacheControl != nil || + hasAnthropicOnlyResponsesToolFlags(tool) { + needsReshape = true break } } - if needsToolCopy { - processedTools = make([]schemas.ResponsesTool, len(resp.Tools)) - for i, tool := range resp.Tools { - if tool.CacheControl != nil { - toolCopy := tool - toolCopy.CacheControl = nil - processedTools[i] = toolCopy - } else { - processedTools[i] = tool + if needsReshape { + processedTools = make([]schemas.ResponsesTool, 0, len(resp.Tools)) + for _, tool := range resp.Tools { + if isAnthropicOnlyResponsesToolType(tool) { + // Drop — OpenAI Responses has no web_fetch or memory. + continue + } + if tool.CacheControl == nil && !hasAnthropicOnlyResponsesToolFlags(tool) { + processedTools = append(processedTools, tool) + continue } + toolCopy := tool + toolCopy.CacheControl = nil + toolCopy.DeferLoading = nil + toolCopy.AllowedCallers = nil + toolCopy.InputExamples = nil + toolCopy.EagerInputStreaming = nil + processedTools = append(processedTools, toolCopy) } } else { processedTools = resp.Tools diff --git a/core/providers/perplexity/chat.go b/core/providers/perplexity/chat.go index dafe1c615b..f2bb5cb1b4 100644 --- a/core/providers/perplexity/chat.go +++ b/core/providers/perplexity/chat.go @@ -38,10 +38,14 @@ func ToPerplexityChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) * // Handle reasoning effort mapping if bifrostReq.Params.Reasoning != nil && bifrostReq.Params.Reasoning.Effort != nil { - if *bifrostReq.Params.Reasoning.Effort == "minimal" { + effort := *bifrostReq.Params.Reasoning.Effort + switch effort { + case "minimal": perplexityReq.ReasoningEffort = schemas.Ptr("low") - } else { - perplexityReq.ReasoningEffort = bifrostReq.Params.Reasoning.Effort + case "xhigh", "max": + perplexityReq.ReasoningEffort = schemas.Ptr("high") + default: + perplexityReq.ReasoningEffort = &effort } } diff --git a/core/providers/replicate/replicate.go b/core/providers/replicate/replicate.go index 373dbb5bbf..cea010c6b9 100644 --- a/core/providers/replicate/replicate.go +++ b/core/providers/replicate/replicate.go @@ -587,6 +587,7 @@ func (provider *ReplicateProvider) TextCompletionStream(ctx *schemas.BifrostCont // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, provider.GetProviderKey(), request.Model, schemas.TextCompletionStreamRequest, provider.logger) @@ -962,6 +963,7 @@ func (provider *ReplicateProvider) ChatCompletionStream(ctx *schemas.BifrostCont // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, provider.GetProviderKey(), request.Model, schemas.ChatCompletionStreamRequest, provider.logger) @@ -1389,6 +1391,10 @@ func (provider *ReplicateProvider) ResponsesStream(ctx *schemas.BifrostContext, // Start streaming in a goroutine go func() { + // Registered first so the post-hook span finalizer runs on every exit + // path — including the empty-reader early return below, which would + // otherwise skip any finalizer declared later in this goroutine. + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, provider.GetProviderKey(), request.Model, schemas.ResponsesStreamRequest, provider.logger) @@ -2014,6 +2020,7 @@ func (provider *ReplicateProvider) ImageGenerationStream(ctx *schemas.BifrostCon // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ImageGenerationStreamRequest, provider.logger) @@ -2450,6 +2457,7 @@ func (provider *ReplicateProvider) ImageEditStream(ctx *schemas.BifrostContext, // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.ImageEditStreamRequest, provider.logger) diff --git a/core/providers/utils/stream.go b/core/providers/utils/stream.go index 6b3ea417b4..1cdd011602 100644 --- a/core/providers/utils/stream.go +++ b/core/providers/utils/stream.go @@ -1,6 +1,8 @@ package utils import ( + "context" + schemas "github.com/maximhq/bifrost/core/schemas" ) @@ -20,7 +22,12 @@ import ( // // If the source channel is closed immediately (empty stream), it returns a // nil channel with nil error. drainDone is already closed. +// +// The ctx argument cancels the background forwarding goroutine if the consumer +// abandons the returned wrapped channel. On ctx.Done the goroutine drains the +// source stream so the upstream provider's blocked send can exit cleanly. func CheckFirstStreamChunkForError( + ctx context.Context, stream chan *schemas.BifrostStreamChunk, ) (chan *schemas.BifrostStreamChunk, <-chan struct{}, *schemas.BifrostError) { firstChunk, ok := <-stream @@ -53,7 +60,15 @@ func CheckFirstStreamChunkForError( defer close(done) defer close(wrapped) for chunk := range stream { - wrapped <- chunk + select { + case wrapped <- chunk: + case <-ctx.Done(): + // Consumer abandoned the wrapped channel. Drain the source so the + // provider's blocked send unblocks and its goroutine can exit. + for range stream { + } + return + } } }() return wrapped, done, nil diff --git a/core/providers/utils/stream_test.go b/core/providers/utils/stream_test.go index 45c88853fa..7e843fa4fd 100644 --- a/core/providers/utils/stream_test.go +++ b/core/providers/utils/stream_test.go @@ -1,7 +1,9 @@ package utils import ( + "context" "testing" + "time" schemas "github.com/maximhq/bifrost/core/schemas" ) @@ -18,7 +20,7 @@ func TestCheckFirstStreamChunk_ErrorInFirstChunk(t *testing.T) { } close(stream) - _, drainDone, err := CheckFirstStreamChunkForError(stream) + _, drainDone, err := CheckFirstStreamChunkForError(context.Background(), stream) if err == nil { t.Fatal("expected error, got nil") } @@ -47,7 +49,7 @@ func TestCheckFirstStreamChunk_ValidFirstChunk(t *testing.T) { stream <- chunk2 close(stream) - wrapped, _, err := CheckFirstStreamChunkForError(stream) + wrapped, _, err := CheckFirstStreamChunkForError(context.Background(), stream) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -75,7 +77,7 @@ func TestCheckFirstStreamChunk_EmptyStream(t *testing.T) { stream := make(chan *schemas.BifrostStreamChunk) close(stream) - wrapped, drainDone, err := CheckFirstStreamChunkForError(stream) + wrapped, drainDone, err := CheckFirstStreamChunkForError(context.Background(), stream) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -110,7 +112,7 @@ func TestCheckFirstStreamChunk_ErrorInSecondChunk(t *testing.T) { close(stream) // Should NOT return error — only first chunk matters for retry - wrapped, _, err := CheckFirstStreamChunkForError(stream) + wrapped, _, err := CheckFirstStreamChunkForError(context.Background(), stream) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -149,7 +151,7 @@ func TestCheckFirstStreamChunk_ErrorDrainsSource(t *testing.T) { } close(stream) - _, drainDone, err := CheckFirstStreamChunkForError(stream) + _, drainDone, err := CheckFirstStreamChunkForError(context.Background(), stream) if err == nil { t.Fatal("expected error, got nil") } @@ -176,7 +178,7 @@ func TestCheckFirstStreamChunk_ErrorWithEmptyMessage(t *testing.T) { } close(stream) - wrapped, _, err := CheckFirstStreamChunkForError(stream) + wrapped, _, err := CheckFirstStreamChunkForError(context.Background(), stream) if err != nil { t.Fatalf("unexpected error for empty message: %v", err) } @@ -184,6 +186,49 @@ func TestCheckFirstStreamChunk_ErrorWithEmptyMessage(t *testing.T) { <-wrapped } +func TestCheckFirstStreamChunk_CtxCancelUnblocksWrapper(t *testing.T) { + // Source with cap=1 so wrapped also has cap=1. wrapped is left full by + // the re-injected first chunk, which makes the forwarder goroutine block + // on its next send — the exact leak condition this test guards against. + src := make(chan *schemas.BifrostStreamChunk, 1) + src <- &schemas.BifrostStreamChunk{ + BifrostChatResponse: &schemas.BifrostChatResponse{ID: "1"}, + } + + ctx, cancel := context.WithCancel(context.Background()) + + wrapped, drainDone, err := CheckFirstStreamChunkForError(ctx, src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if wrapped == nil { + t.Fatal("expected wrapped channel, got nil") + } + + // Push a second chunk; forwarder will read it from src and then block + // trying to send into the full wrapped channel (we intentionally never + // read from wrapped). + src <- &schemas.BifrostStreamChunk{ + BifrostChatResponse: &schemas.BifrostChatResponse{ID: "2"}, + } + + // Cancel — forwarder must stop trying to send to wrapped and drain src. + cancel() + + // Simulate the upstream producer still emitting, then closing. The + // drain loop should consume these and terminate. + src <- &schemas.BifrostStreamChunk{ + BifrostChatResponse: &schemas.BifrostChatResponse{ID: "3"}, + } + close(src) + + select { + case <-drainDone: + case <-time.After(time.Second): + t.Fatal("drainDone did not close after ctx cancel; forwarder goroutine leaked") + } +} + func TestCheckFirstStreamChunk_CodeOnlyError(t *testing.T) { // Error with code but no message should be treated as an error stream := make(chan *schemas.BifrostStreamChunk, 2) @@ -196,7 +241,7 @@ func TestCheckFirstStreamChunk_CodeOnlyError(t *testing.T) { } close(stream) - _, drainDone, err := CheckFirstStreamChunkForError(stream) + _, drainDone, err := CheckFirstStreamChunkForError(context.Background(), stream) if err == nil { t.Fatal("expected error for code-only error, got nil") } diff --git a/core/providers/utils/utils.go b/core/providers/utils/utils.go index c1b7259375..efd1ea992b 100644 --- a/core/providers/utils/utils.go +++ b/core/providers/utils/utils.go @@ -1893,6 +1893,41 @@ func ProcessAndSendBifrostError( } } +// EnsureStreamFinalizerCalled invokes the post-hook span finalizer registered +// on ctx, if any. Designed to be deferred as the last line of defence in a +// provider's streaming goroutine (next to SetupStreamCancellation's cleanup): +// +// defer providerUtils.EnsureStreamFinalizerCalled(ctx) +// +// On a normal stream end the finalizer is already invoked when the final chunk +// is processed (via completeDeferredSpan). The registration wraps the closure +// in sync.Once, so this safety-net call is a noop in that case. It only does +// real work when the streaming goroutine exits without reaching the final-chunk +// path — e.g. a panic mid-stream — which would otherwise leak the plugin +// pipeline back-reference held by the finalizer closure. +// +// Panics inside the finalizer are recovered and logged so they never mask an +// in-flight panic that triggered the defer. +func EnsureStreamFinalizerCalled(ctx context.Context) { + // Install the recover first so any panic — including one triggered by + // accessing ctx itself — is caught. This matters because this helper is + // called from `defer`, so a panic here would mask the in-flight panic + // that invoked the defer. + defer func() { + if r := recover(); r != nil { + getLogger().Debug("recovered panic in deferred stream finalizer: %v", r) + } + }() + if ctx == nil { + return + } + finalizer, ok := ctx.Value(schemas.BifrostContextKeyPostHookSpanFinalizer).(func(context.Context)) + if !ok || finalizer == nil { + return + } + finalizer(ctx) +} + // SetupStreamCancellation spawns a goroutine that closes the body stream when // the context is cancelled or deadline exceeded, unblocking any blocked Read/Scan operations. // Returns a cleanup function that MUST be called when streaming is done to @@ -2557,9 +2592,8 @@ func GetReasoningEffortFromBudgetTokens( } } -// GetBudgetTokensFromReasoningEffort converts OpenAI reasoning effort -// into a reasoning token budget. -// effort ∈ {"none", "minimal", "low", "medium", "high"} +// GetBudgetTokensFromReasoningEffort converts reasoning effort into a reasoning token budget. +// effort ∈ {"none", "minimal", "low", "medium", "high", "xhigh", "max"} func GetBudgetTokensFromReasoningEffort( effort string, minBudgetTokens int, @@ -2589,6 +2623,10 @@ func GetBudgetTokensFromReasoningEffort( ratio = 0.425 case "high": ratio = 0.80 + case "xhigh": + ratio = 0.92 + case "max": + ratio = 1.0 default: // Unknown effort → safe default ratio = 0.425 diff --git a/core/providers/vertex/vertex.go b/core/providers/vertex/vertex.go index 37e7af90f2..1f5682d692 100644 --- a/core/providers/vertex/vertex.go +++ b/core/providers/vertex/vertex.go @@ -3466,6 +3466,7 @@ func (provider *VertexProvider) PassthroughStream( ch := make(chan *schemas.BifrostStreamChunk, schemas.DefaultStreamBufferSize) go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, ch, provider.GetProviderKey(), req.Model, schemas.PassthroughStreamRequest, provider.logger) diff --git a/core/providers/vertex/vertex_test.go b/core/providers/vertex/vertex_test.go index 7203bf3080..03baf347fa 100644 --- a/core/providers/vertex/vertex_test.go +++ b/core/providers/vertex/vertex_test.go @@ -68,8 +68,10 @@ func TestVertex(t *testing.T) { PromptCaching: true, ListModels: false, CountTokens: true, - StructuredOutputs: true, // Structured outputs with nullable enum support - InterleavedThinking: true, + StructuredOutputs: true, // Structured outputs with nullable enum support + InterleavedThinking: true, + EagerInputStreaming: true, // fine-grained-tool-streaming-2025-05-14 (GA on Vertex) + ServerToolsViaOpenAIEndpoint: true, // web_search only on Vertex per Table 20 (web_fetch/code_execution skip) }, } diff --git a/core/providers/vllm/vllm.go b/core/providers/vllm/vllm.go index 5cafe918af..a6b7b1c48c 100644 --- a/core/providers/vllm/vllm.go +++ b/core/providers/vllm/vllm.go @@ -524,6 +524,7 @@ func (provider *VLLMProvider) TranscriptionStream(ctx *schemas.BifrostContext, p // Start streaming in a goroutine go func() { + defer providerUtils.EnsureStreamFinalizerCalled(ctx) defer func() { if ctx.Err() == context.Canceled { providerUtils.HandleStreamCancellation(ctx, postHookRunner, responseChan, providerName, request.Model, schemas.TranscriptionStreamRequest, logger) diff --git a/core/schemas/chatcompletions.go b/core/schemas/chatcompletions.go index 51971243f0..be70991155 100644 --- a/core/schemas/chatcompletions.go +++ b/core/schemas/chatcompletions.go @@ -2,6 +2,7 @@ package schemas import ( "bytes" + "encoding/json" "fmt" "time" ) @@ -186,6 +187,7 @@ type ChatParameters struct { MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` // Maximum number of tokens to generate Metadata *map[string]any `json:"metadata,omitempty"` // Metadata to be returned with the response Modalities []string `json:"modalities,omitempty"` // Modalities to be returned with the response + N *int `json:"n,omitempty"` // Number of chat completions to generate when supported ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` Prediction *ChatPrediction `json:"prediction,omitempty"` // Predicted output content (OpenAI only) PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Penalizes repeated tokens @@ -208,6 +210,19 @@ type ChatParameters struct { Verbosity *string `json:"verbosity,omitempty"` // "low" | "medium" | "high" WebSearchOptions *ChatWebSearchOptions `json:"web_search_options,omitempty"` // Web search options (OpenAI only) + // Anthropic-native knobs promoted to the neutral layer. These pass through + // typed to Anthropic-family providers (honored/stripped per ProviderFeatures + // in core/providers/anthropic/types.go). Non-Anthropic providers (OpenAI + // etc.) silently ignore them. + TopK *int `json:"top_k,omitempty"` // Anthropic top_k sampling + Speed *string `json:"speed,omitempty"` // "fast" (Anthropic fast-mode-2026-02-01 beta, Opus 4.6 only) + InferenceGeo *string `json:"inference_geo,omitempty"` // Anthropic inference_geo (Claude API only) + MCPServers []ChatMCPServer `json:"mcp_servers,omitempty"` // Anthropic MCP connector (mcp-client-2025-11-20) + Container *ChatContainer `json:"container,omitempty"` // Anthropic container (string id, or object with skills[] — beta skills-2025-10-02) + CacheControl *CacheControl `json:"cache_control,omitempty"` // Top-level request cache control (Anthropic family) + TaskBudget *ChatTaskBudget `json:"task_budget,omitempty"` // Anthropic output_config.task_budget (task-budgets-2026-03-13 beta) + ContextManagement json.RawMessage `json:"context_management,omitempty"` // Anthropic context_management — complex union, passed as raw JSON to the provider layer + // Dynamic parameters that can be provider-specific, they are directly // added to the request as is. ExtraParams map[string]interface{} `json:"-"` @@ -269,6 +284,7 @@ type ChatReasoning struct { Enabled *bool `json:"enabled,omitempty"` // Explicitly enable or disable reasoning (required by OpenRouter to disable reasoning for some models) Effort *string `json:"effort,omitempty"` // "none" | "minimal" | "low" | "medium" | "high" (any value other than "none" will enable reasoning) MaxTokens *int `json:"max_tokens,omitempty"` // Maximum number of tokens to generate for the reasoning output (required for anthropic) + Display *string `json:"display,omitempty"` // Anthropic thinking.display: "summarized" | "omitted" (requires model support for adaptive thinking) } // ChatPrediction represents predicted output content for the model to reference (OpenAI only). @@ -313,12 +329,179 @@ const ( ChatToolTypeCustom ChatToolType = "custom" ) +type MCPToolAnnotations struct { + Title string `json:"title,omitempty"` // Human-readable title for the tool + ReadOnlyHint *bool `json:"readOnlyHint,omitempty"` // If true, the tool does not modify its environment + DestructiveHint *bool `json:"destructiveHint,omitempty"` // If true, the tool may perform destructive updates + IdempotentHint *bool `json:"idempotentHint,omitempty"` // If true, repeated calls with same args have no additional effect + OpenWorldHint *bool `json:"openWorldHint,omitempty"` // If true, the tool interacts with external entities +} + // ChatTool represents a tool definition. +// +// Three shapes coexist under this type: +// 1. OpenAI function tool: Type="function", Function non-nil. +// 2. Custom tool: Type="custom", Custom non-nil. +// 3. Anthropic server tool: Type=server-tool version string (e.g. +// "web_search_20260209", "computer_20251124", "mcp_toolset"), Function/Custom +// nil, Name populated at top level, and the variant-specific fields +// (MaxUses, DisplayWidthPx, etc.) populated inline. +// +// JSON shape for (3) matches Anthropic's native tool format directly +// (e.g. {"type":"web_search_20260209","name":"web_search","max_uses":5}). +// +// Custom MarshalJSON/UnmarshalJSON enforce the union invariant: +// - On marshal, fields that don't match Type are cleared on a copy so the +// wire format always carries exactly one variant. Mixed caller state +// (e.g. Type="web_search_20260209" with Function also set) gets +// canonicalized instead of being forwarded ambiguously to providers. +// - On unmarshal, tolerantly accept whatever JSON shape comes in, then +// normalize the decoded struct so downstream code sees a canonical shape. type ChatTool struct { - Type ChatToolType `json:"type"` - Function *ChatToolFunction `json:"function,omitempty"` // Function definition - Custom *ChatToolCustom `json:"custom,omitempty"` // Custom tool definition - CacheControl *CacheControl `json:"cache_control,omitempty"` // Cache control for the tool + Type ChatToolType `json:"type"` + Function *ChatToolFunction `json:"function,omitempty"` // Function definition (shape 1) + Custom *ChatToolCustom `json:"custom,omitempty"` // Custom tool definition (shape 2) + CacheControl *CacheControl `json:"cache_control,omitempty"` // Cache control for the tool + Annotations *MCPToolAnnotations `json:"-"` // MCP tool annotations (Bifrost-internal, never forwarded to providers) + + // Anthropic-native tool flags promoted to the neutral layer. All optional; + // ignored by providers that don't support them. Gating per ProviderFeatures + // in core/providers/anthropic/types.go. + DeferLoading *bool `json:"defer_loading,omitempty"` // Anthropic advanced-tool-use: defer loading of tool definition + AllowedCallers []string `json:"allowed_callers,omitempty"` // Anthropic advanced-tool-use: which callers can invoke this tool ("direct", "code_execution_20250825", "code_execution_20260120") + InputExamples []ChatToolInputExample `json:"input_examples,omitempty"` // Anthropic tool-examples-2025-10-29: example inputs for the tool + EagerInputStreaming *bool `json:"eager_input_streaming,omitempty"` // Anthropic fine-grained-tool-streaming-2025-05-14: stream input_json_delta before full args are determined (custom tools only) + + // Anthropic server-tool fields (shape 3). All optional; only populated when + // Type is a server-tool version string. Function tools carry their name + // inside Function.Name — use omitempty here so Name doesn't double-emit. + Name string `json:"name,omitempty"` + + // web_search_* and web_fetch_*: + MaxUses *int `json:"max_uses,omitempty"` + AllowedDomains []string `json:"allowed_domains,omitempty"` + BlockedDomains []string `json:"blocked_domains,omitempty"` + UserLocation *ChatToolUserLocation `json:"user_location,omitempty"` + + // web_fetch_* only: + MaxContentTokens *int `json:"max_content_tokens,omitempty"` + Citations *ChatToolCitationsConfig `json:"citations,omitempty"` + UseCache *bool `json:"use_cache,omitempty"` // web_fetch_20260309+ only + + // computer_*: + DisplayWidthPx *int `json:"display_width_px,omitempty"` + DisplayHeightPx *int `json:"display_height_px,omitempty"` + DisplayNumber *int `json:"display_number,omitempty"` + EnableZoom *bool `json:"enable_zoom,omitempty"` // computer_20251124 only + + // text_editor_20250728+: + MaxCharacters *int `json:"max_characters,omitempty"` + + // mcp_toolset: + MCPServerName string `json:"mcp_server_name,omitempty"` + DefaultConfig *ChatMCPToolsetConfig `json:"default_config,omitempty"` + Configs map[string]*ChatMCPToolsetConfig `json:"configs,omitempty"` +} + +// normalizeShape clears fields that don't belong to the ChatTool's active +// variant, encoding the three-way union invariant: +// +// 1. Type="function": keep Function; nil Custom, server-tool Name, and +// variant metadata (function tools carry their name inside Function.Name). +// 2. Type="custom": keep Custom and top-level Name; nil Function and +// server-tool variant metadata. +// 3. Any other Type: server-tool variant — keep Name and variant fields; +// nil Function and Custom. +// +// Called by both Marshal (strict wire format) and Unmarshal (canonicalize +// after tolerant decode of potentially mixed input). +func (t *ChatTool) normalizeShape() { + switch t.Type { + case ChatToolTypeFunction: + t.Custom = nil + t.Name = "" + t.clearServerToolVariantFields() + case ChatToolTypeCustom: + t.Function = nil + t.clearServerToolVariantFields() + default: + t.Function = nil + t.Custom = nil + } +} + +func (t *ChatTool) clearServerToolVariantFields() { + t.MaxUses = nil + t.AllowedDomains = nil + t.BlockedDomains = nil + t.UserLocation = nil + t.MaxContentTokens = nil + t.Citations = nil + t.UseCache = nil + t.DisplayWidthPx = nil + t.DisplayHeightPx = nil + t.DisplayNumber = nil + t.EnableZoom = nil + t.MaxCharacters = nil + t.MCPServerName = "" + t.DefaultConfig = nil + t.Configs = nil +} + +// MarshalJSON enforces the ChatTool union invariant: exactly one variant's +// fields are emitted on the wire, matching Type. A mix-state tool +// (e.g. Type="web_search_20260209" with Function also populated) would +// otherwise serialize both, and downstream provider converters — which +// dispatch on the top-level Type/Name shape — could misinterpret or +// silently forward the stray fields. +func (t ChatTool) MarshalJSON() ([]byte, error) { + normalized := t + normalized.normalizeShape() + type Alias ChatTool + return MarshalSorted((*Alias)(&normalized)) +} + +// UnmarshalJSON tolerantly decodes whatever JSON shape arrives, then +// canonicalizes the struct via normalizeShape so downstream code sees a +// single-variant result even if the input mixed multiple variants. +// Resets the receiver before decoding so omitted optional fields from a +// prior payload don't survive the new decode; mirrors ChatContainer.UnmarshalJSON. +func (t *ChatTool) UnmarshalJSON(data []byte) error { + trimmed := bytes.TrimSpace(data) + if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) { + *t = ChatTool{} + return nil + } + + type Alias ChatTool + var temp Alias + if err := Unmarshal(data, &temp); err != nil { + return err + } + *t = ChatTool(temp) + t.normalizeShape() + return nil +} + +// ChatToolUserLocation is the neutral user_location for web_search tools. +type ChatToolUserLocation struct { + Type *string `json:"type,omitempty"` // "approximate" + City *string `json:"city,omitempty"` + Region *string `json:"region,omitempty"` + Country *string `json:"country,omitempty"` + Timezone *string `json:"timezone,omitempty"` +} + +// ChatToolCitationsConfig is the request-side citations config on web_fetch +// ({"enabled": true/false}). Distinct from response-side text citations. +type ChatToolCitationsConfig struct { + Enabled *bool `json:"enabled,omitempty"` +} + +// ChatMCPToolsetConfig configures an MCP toolset entry (mcp_toolset tool). +type ChatMCPToolsetConfig struct { + Enabled *bool `json:"enabled,omitempty"` + DeferLoading *bool `json:"defer_loading,omitempty"` } // ChatToolFunction represents a function definition. @@ -946,6 +1129,103 @@ type CacheControl struct { Scope *string `json:"scope,omitempty"` // "user" | "global" } +// --------------------------------------------------------------------------- +// Neutral mirror types for Anthropic-native knobs promoted onto ChatParameters +// --------------------------------------------------------------------------- +// These live in schemas/ (not provider-specific) so ChatParameters stays +// import-free of provider packages. The anthropic provider reads them in +// ToAnthropicChatRequest and maps them to AnthropicMessageRequest fields. + +// ChatContainerSkill describes one skill attached to a container. +// Origin: Anthropic container.skills[] (beta skills-2025-10-02). +type ChatContainerSkill struct { + SkillID string `json:"skill_id"` + Type string `json:"type"` // "anthropic" | "custom" + Version *string `json:"version,omitempty"` // Optional version pin +} + +// ChatContainerObject is the object form of ChatContainer. +// Both fields are optional — ID alone is a bare container reference; +// adding Skills makes it beta-gated. +type ChatContainerObject struct { + ID *string `json:"id,omitempty"` + Skills []ChatContainerSkill `json:"skills,omitempty"` +} + +// ChatContainer is the union "container" field on a chat request. +// Anthropic's API accepts either a plain string (container id) or an object +// with id + skills[]. Mirrors AnthropicContainer in the provider package. +type ChatContainer struct { + ContainerStr *string + ContainerObject *ChatContainerObject +} + +// MarshalJSON emits the raw string or the object form directly. +func (c ChatContainer) MarshalJSON() ([]byte, error) { + if c.ContainerStr != nil && c.ContainerObject != nil { + return nil, fmt.Errorf("both ContainerStr and ContainerObject are set; only one should be non-nil") + } + if c.ContainerStr != nil { + return MarshalSorted(*c.ContainerStr) + } + if c.ContainerObject != nil { + return MarshalSorted(c.ContainerObject) + } + return MarshalSorted(nil) +} + +// UnmarshalJSON accepts either a plain string or the object form. +// Uses the build-tag-aware package-level Unmarshal (sonic on native, stdlib +// json on wasm/tinygo) and clears the inactive union arm on each success so +// repeated decodes into the same value don't leave both arms populated. +// JSON null clears both arms. Follows the ChatToolChoice.UnmarshalJSON pattern. +func (c *ChatContainer) UnmarshalJSON(data []byte) error { + trimmed := bytes.TrimSpace(data) + if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) { + c.ContainerStr = nil + c.ContainerObject = nil + return nil + } + + var s string + if err := Unmarshal(data, &s); err == nil { + c.ContainerStr = &s + c.ContainerObject = nil + return nil + } + var obj ChatContainerObject + if err := Unmarshal(data, &obj); err == nil { + c.ContainerStr = nil + c.ContainerObject = &obj + return nil + } + return fmt.Errorf("container field is neither a string nor an object") +} + +// ChatTaskBudget advises the model of a full-loop token budget. +// Origin: Anthropic output_config.task_budget (beta task-budgets-2026-03-13). +type ChatTaskBudget struct { + Type string `json:"type"` // Always "tokens" + Total int `json:"total"` // Total advisory budget + Remaining *int `json:"remaining,omitempty"` // Optional client-side counter +} + +// ChatToolInputExample is one example input for a tool, shown to the model. +// Origin: Anthropic tool.input_examples (beta tool-examples-2025-10-29). +type ChatToolInputExample struct { + Input json.RawMessage `json:"input"` + Description *string `json:"description,omitempty"` +} + +// ChatMCPServer is an MCP server definition attached to a chat request. +// Origin: Anthropic mcp_servers[] (mcp-client-2025-11-20 format). +type ChatMCPServer struct { + Type string `json:"type"` // "url" + URL string `json:"url"` + Name string `json:"name"` + AuthorizationToken *string `json:"authorization_token,omitempty"` +} + // ChatInputImage represents image data in a message. type ChatInputImage struct { URL string `json:"url"` diff --git a/core/schemas/images.go b/core/schemas/images.go index d16df42a10..1944f96d3f 100644 --- a/core/schemas/images.go +++ b/core/schemas/images.go @@ -69,8 +69,24 @@ type BifrostImageGenerationResponse struct { // - Size on ImageGenerationResponseParameters (from request params if not in response) // - Quality (low, medium, high, auto) only func (r *BifrostImageGenerationResponse) BackfillParams(req *BifrostRequest) { + if r == nil || req == nil { + return + } numInputImages, size, quality := getNumInputImagesSizeAndQualityFromRequest(req) + // Backfill Model from whichever inner request carries it. Some provider APIs + // (notably OpenAI /v1/images/*) omit model in the response body. + if r.Model == "" { + switch { + case req.ImageGenerationRequest != nil: + r.Model = req.ImageGenerationRequest.Model + case req.ImageEditRequest != nil: + r.Model = req.ImageEditRequest.Model + case req.ImageVariationRequest != nil: + r.Model = req.ImageVariationRequest.Model + } + } + // Backfill NumInputImages if numInputImages > 0 { if r.Usage == nil { diff --git a/core/schemas/responses.go b/core/schemas/responses.go index adc1d5d07f..04a9be8bd2 100644 --- a/core/schemas/responses.go +++ b/core/schemas/responses.go @@ -1406,6 +1406,14 @@ type ResponsesTool struct { // Not in OpenAI's schemas, but sent by a few providers (Anthropic, Bedrock are some of them) CacheControl *CacheControl `json:"cache_control,omitempty"` + // Anthropic-native tool flags promoted to the neutral layer. All optional; + // ignored by providers that don't support them. Gated per ProviderFeatures + // in core/providers/anthropic/types.go. + DeferLoading *bool `json:"defer_loading,omitempty"` // Anthropic advanced-tool-use: defer loading of tool definition + AllowedCallers []string `json:"allowed_callers,omitempty"` // Anthropic advanced-tool-use: which callers can invoke this tool + InputExamples []ChatToolInputExample `json:"input_examples,omitempty"` // Anthropic tool-examples-2025-10-29: example inputs for the tool + EagerInputStreaming *bool `json:"eager_input_streaming,omitempty"` // Anthropic fine-grained-tool-streaming-2025-05-14 + *ResponsesToolFunction *ResponsesToolFileSearch *ResponsesToolComputerUsePreview @@ -1463,6 +1471,38 @@ func (t ResponsesTool) MarshalJSON() ([]byte, error) { return nil, err } } + // Anthropic-native tool flags promoted to the neutral layer. Must be + // emitted here (before the type-specific merge) so the wire format carries + // them to providers that gate features on these keys. Without this block + // MarshalJSON silently drops the fields despite their json tags. + if t.DeferLoading != nil { + if data, err = sjson.SetBytes(data, "defer_loading", *t.DeferLoading); err != nil { + return nil, err + } + } + if len(t.AllowedCallers) > 0 { + callersBytes, callersErr := MarshalSorted(t.AllowedCallers) + if callersErr != nil { + return nil, callersErr + } + if data, err = sjson.SetRawBytes(data, "allowed_callers", callersBytes); err != nil { + return nil, err + } + } + if len(t.InputExamples) > 0 { + examplesBytes, examplesErr := MarshalSorted(t.InputExamples) + if examplesErr != nil { + return nil, examplesErr + } + if data, err = sjson.SetRawBytes(data, "input_examples", examplesBytes); err != nil { + return nil, err + } + } + if t.EagerInputStreaming != nil { + if data, err = sjson.SetBytes(data, "eager_input_streaming", *t.EagerInputStreaming); err != nil { + return nil, err + } + } // Marshal the type-specific embedded struct and merge its fields var typeBytes []byte @@ -1566,6 +1606,32 @@ func (t *ResponsesTool) UnmarshalJSON(data []byte) error { } t.CacheControl = &cc } + // Anthropic-native tool flags. Mirror the emit side in MarshalJSON above — + // without these reads, a round-trip silently drops the fields. + if v, ok := raw["defer_loading"].(bool); ok { + t.DeferLoading = Ptr(v) + } + if v, ok := raw["allowed_callers"]; ok { + bytes, err := MarshalSorted(v) + if err != nil { + return err + } + if err := Unmarshal(bytes, &t.AllowedCallers); err != nil { + return err + } + } + if v, ok := raw["input_examples"]; ok { + bytes, err := MarshalSorted(v) + if err != nil { + return err + } + if err := Unmarshal(bytes, &t.InputExamples); err != nil { + return err + } + } + if v, ok := raw["eager_input_streaming"].(bool); ok { + t.EagerInputStreaming = Ptr(v) + } // Based on type, unmarshal into the appropriate embedded struct switch t.Type { diff --git a/core/schemas/serialization_test.go b/core/schemas/serialization_test.go index 66b720b680..94b9a27dae 100644 --- a/core/schemas/serialization_test.go +++ b/core/schemas/serialization_test.go @@ -781,6 +781,204 @@ func TestResponsesTool_MarshalJSON_RoundTrip(t *testing.T) { } } +// TestResponsesTool_RoundTrip_AnthropicFields ensures the Anthropic-native tool +// flags promoted onto ResponsesTool (defer_loading, allowed_callers, +// input_examples, eager_input_streaming) survive a full Marshal→Unmarshal→ +// Marshal cycle. Before MarshalJSON/UnmarshalJSON were taught to handle these +// keys, all four were silently dropped at the JSON boundary. +func TestResponsesTool_RoundTrip_AnthropicFields(t *testing.T) { + original := ResponsesTool{ + Type: ResponsesToolTypeFunction, + Name: Ptr("lookup"), + Description: Ptr("lookup something"), + DeferLoading: Ptr(true), + AllowedCallers: []string{"direct", "agent"}, + EagerInputStreaming: Ptr(false), + InputExamples: []ChatToolInputExample{ + {Input: json.RawMessage(`{"q":"hello"}`), Description: Ptr("basic")}, + {Input: json.RawMessage(`{"q":"world"}`)}, + }, + ResponsesToolFunction: &ResponsesToolFunction{ + Parameters: &ToolFunctionParameters{}, + }, + } + + data, err := Marshal(original) + require.NoError(t, err) + + // All four keys must appear in the wire bytes. + for _, key := range []string{`"defer_loading"`, `"allowed_callers"`, `"input_examples"`, `"eager_input_streaming"`} { + assert.Contains(t, string(data), key, + "%s must be emitted by MarshalJSON — otherwise it is silently dropped", key) + } + + var decoded ResponsesTool + require.NoError(t, Unmarshal(data, &decoded)) + + require.NotNil(t, decoded.DeferLoading) + assert.True(t, *decoded.DeferLoading) + assert.Equal(t, []string{"direct", "agent"}, decoded.AllowedCallers) + require.NotNil(t, decoded.EagerInputStreaming) + assert.False(t, *decoded.EagerInputStreaming) + require.Len(t, decoded.InputExamples, 2) + assert.JSONEq(t, `{"q":"hello"}`, string(decoded.InputExamples[0].Input)) + require.NotNil(t, decoded.InputExamples[0].Description) + assert.Equal(t, "basic", *decoded.InputExamples[0].Description) + assert.JSONEq(t, `{"q":"world"}`, string(decoded.InputExamples[1].Input)) + + // Second-round marshal must be byte-stable. + data2, err := Marshal(decoded) + require.NoError(t, err) + assert.Equal(t, string(data), string(data2), "round-trip must be stable") +} + +// TestChatTool_MarshalJSON_EnforcesUnion verifies that the custom codec +// canonicalizes mixed-state ChatTools on the wire, regardless of what the +// caller populated in memory. Exactly one variant's fields survive marshal — +// matching Type — so downstream provider converters can't misinterpret or +// forward stray fields from a different shape. +func TestChatTool_MarshalJSON_EnforcesUnion(t *testing.T) { + t.Run("function_type_clears_custom_and_server_tool_fields", func(t *testing.T) { + tool := ChatTool{ + Type: ChatToolTypeFunction, + Function: &ChatToolFunction{Name: "get_weather"}, + // Mixed state: server-tool + custom fields also populated. + Custom: &ChatToolCustom{}, + Name: "leaked_name", + MaxUses: Ptr(5), + DisplayWidthPx: Ptr(1280), + MCPServerName: "leaked_server", + } + data, err := Marshal(tool) + require.NoError(t, err) + raw := string(data) + + assert.Contains(t, raw, `"type":"function"`) + assert.Contains(t, raw, `"get_weather"`) + for _, leak := range []string{`"custom"`, `"leaked_name"`, `"max_uses"`, `"display_width_px"`, `"mcp_server_name"`} { + assert.NotContains(t, raw, leak, "function-type wire must not carry %s", leak) + } + }) + + t.Run("custom_type_clears_function_and_server_tool_fields", func(t *testing.T) { + tool := ChatTool{ + Type: ChatToolTypeCustom, + Custom: &ChatToolCustom{Format: &ChatToolCustomFormat{Type: "text"}}, + Name: "my_custom", + // Leaks + Function: &ChatToolFunction{Name: "should_be_stripped"}, + MaxUses: Ptr(5), + } + data, err := Marshal(tool) + require.NoError(t, err) + raw := string(data) + + assert.Contains(t, raw, `"type":"custom"`) + assert.Contains(t, raw, `"my_custom"`) // custom tool retains top-level Name + assert.Contains(t, raw, `"format"`) // custom's format field + assert.NotContains(t, raw, `"function"`) + assert.NotContains(t, raw, `"should_be_stripped"`) + assert.NotContains(t, raw, `"max_uses"`) + }) + + t.Run("server_tool_type_clears_function_and_custom", func(t *testing.T) { + tool := ChatTool{ + Type: "web_search_20260209", + Name: "web_search", + MaxUses: Ptr(5), + AllowedCallers: []string{"direct"}, + // Leaks + Function: &ChatToolFunction{Name: "should_be_stripped"}, + Custom: &ChatToolCustom{}, + } + data, err := Marshal(tool) + require.NoError(t, err) + raw := string(data) + + assert.Contains(t, raw, `"type":"web_search_20260209"`) + assert.Contains(t, raw, `"web_search"`) + assert.Contains(t, raw, `"max_uses":5`) + assert.Contains(t, raw, `"allowed_callers":["direct"]`) + assert.NotContains(t, raw, `"function"`) + assert.NotContains(t, raw, `"custom"`) + assert.NotContains(t, raw, `"should_be_stripped"`) + }) +} + +// TestChatTool_UnmarshalJSON_NormalizesMixedInput verifies that tolerant +// decode of a mixed-shape payload produces a canonical single-variant struct +// so downstream provider conversion code doesn't have to defend against +// the untrusted shape. +func TestChatTool_UnmarshalJSON_NormalizesMixedInput(t *testing.T) { + t.Run("function_type_mixed_with_server_fields_normalizes", func(t *testing.T) { + // Caller sends a function tool but also includes server-tool metadata. + raw := []byte(`{ + "type":"function", + "function":{"name":"get_weather"}, + "name":"stray_server_name", + "max_uses":5, + "display_width_px":1280 + }`) + var tool ChatTool + require.NoError(t, Unmarshal(raw, &tool)) + + assert.Equal(t, ChatToolTypeFunction, tool.Type) + require.NotNil(t, tool.Function) + assert.Equal(t, "get_weather", tool.Function.Name) + assert.Empty(t, tool.Name, "function-type must nil top-level Name (lives in Function.Name)") + assert.Nil(t, tool.MaxUses) + assert.Nil(t, tool.DisplayWidthPx) + }) + + t.Run("server_tool_type_mixed_with_function_normalizes", func(t *testing.T) { + // Caller sends a server-tool but also includes function. + raw := []byte(`{ + "type":"web_search_20260209", + "name":"web_search", + "max_uses":5, + "function":{"name":"stray"} + }`) + var tool ChatTool + require.NoError(t, Unmarshal(raw, &tool)) + + assert.Equal(t, ChatToolType("web_search_20260209"), tool.Type) + assert.Equal(t, "web_search", tool.Name) + require.NotNil(t, tool.MaxUses) + assert.Equal(t, 5, *tool.MaxUses) + assert.Nil(t, tool.Function, "server-tool must nil Function") + assert.Nil(t, tool.Custom, "server-tool must nil Custom") + }) +} + +// TestChatTool_RoundTrip_SurvivesMixedInput verifies that a mixed-input +// payload, once canonicalized by Unmarshal and re-emitted by Marshal, drops +// the stray fields and produces a deterministic single-variant wire format. +func TestChatTool_RoundTrip_SurvivesMixedInput(t *testing.T) { + raw := []byte(`{ + "type":"web_search_20260209", + "name":"web_search", + "max_uses":5, + "function":{"name":"stray"}, + "custom":{"format":{"type":"text"}} + }`) + var tool ChatTool + require.NoError(t, Unmarshal(raw, &tool)) + + out, err := Marshal(tool) + require.NoError(t, err) + outStr := string(out) + assert.NotContains(t, outStr, `"function"`) + assert.NotContains(t, outStr, `"custom"`) + assert.Contains(t, outStr, `"web_search_20260209"`) + + // Second pass must be byte-stable (critical for prompt caching keys). + var tool2 ChatTool + require.NoError(t, Unmarshal(out, &tool2)) + out2, err := Marshal(tool2) + require.NoError(t, err) + assert.Equal(t, string(out), string(out2), "round-trip must be stable") +} + func TestToolFunctionParameters_ExplicitEmptyObjectPreserved(t *testing.T) { var params ToolFunctionParameters err := Unmarshal([]byte(`{}`), ¶ms) @@ -1069,3 +1267,90 @@ func TestResponsesTool_UnmarshalJSON_NormalizesVersionedToolTypes(t *testing.T) }) } } + +// TestSonic_ChatTool_AnnotationsNeverSerialized verifies that MCPToolAnnotations +// (json:"-") are never included in the JSON payload sent to providers. +func TestSonic_ChatTool_AnnotationsNeverSerialized(t *testing.T) { + readOnly := true + destructive := false + + tool := ChatTool{ + Type: ChatToolTypeFunction, + Function: &ChatToolFunction{ + Name: "read_file", + Description: Ptr("Reads a file from the filesystem"), + Parameters: &ToolFunctionParameters{ + Type: "object", + Properties: NewOrderedMapFromPairs(KV("path", map[string]interface{}{"type": "string"})), + Required: []string{"path"}, + }, + }, + Annotations: &MCPToolAnnotations{ + Title: "File Reader", + ReadOnlyHint: &readOnly, + DestructiveHint: &destructive, + IdempotentHint: Ptr(true), + }, + } + + output, err := Marshal(tool) + require.NoError(t, err) + + s := string(output) + + // Annotations must be absent — json:"-" must suppress the entire field + assert.NotContains(t, s, "annotations", "annotations field must not appear in provider payload") + assert.NotContains(t, s, "readOnlyHint", "readOnlyHint must not appear in provider payload") + assert.NotContains(t, s, "destructiveHint", "destructiveHint must not appear in provider payload") + assert.NotContains(t, s, "idempotentHint", "idempotentHint must not appear in provider payload") + assert.NotContains(t, s, "File Reader", "annotation title must not appear in provider payload") + + // The function definition itself must still be present + assert.Contains(t, s, "read_file", "function name must be in payload") + assert.Contains(t, s, "path", "parameter must be in payload") +} + +// TestSonic_ChatTool_DeepCopy_AnnotationsPreserved verifies that DeepCopyChatTool +// correctly copies Annotations so they survive any clone-based flows. +func TestSonic_ChatTool_DeepCopy_AnnotationsPreserved(t *testing.T) { + readOnly := true + idempotent := false + + original := ChatTool{ + Type: ChatToolTypeFunction, + Function: &ChatToolFunction{ + Name: "query_db", + }, + Annotations: &MCPToolAnnotations{ + Title: "DB Query", + ReadOnlyHint: &readOnly, + IdempotentHint: &idempotent, + }, + } + + copied := DeepCopyChatTool(original) + + require.NotNil(t, copied.Annotations) + assert.Equal(t, "DB Query", copied.Annotations.Title) + assert.Equal(t, true, *copied.Annotations.ReadOnlyHint) + assert.Equal(t, false, *copied.Annotations.IdempotentHint) + assert.Nil(t, copied.Annotations.DestructiveHint) + assert.Nil(t, copied.Annotations.OpenWorldHint) + + // Verify it's a true deep copy — mutations don't bleed back + *original.Annotations.ReadOnlyHint = false + assert.True(t, *copied.Annotations.ReadOnlyHint, "copy must not share pointer with original") +} + +// TestSonic_ChatTool_DeepCopy_NilAnnotationsStaysNil verifies that a tool +// without annotations deep-copies cleanly with Annotations remaining nil. +func TestSonic_ChatTool_DeepCopy_NilAnnotationsStaysNil(t *testing.T) { + original := ChatTool{ + Type: ChatToolTypeFunction, + Function: &ChatToolFunction{Name: "plain_tool"}, + } + + copied := DeepCopyChatTool(original) + + assert.Nil(t, copied.Annotations, "Annotations should stay nil when original has none") +} diff --git a/core/schemas/utils.go b/core/schemas/utils.go index 7c59020f40..5e61c84ade 100644 --- a/core/schemas/utils.go +++ b/core/schemas/utils.go @@ -879,6 +879,30 @@ func DeepCopyChatTool(original ChatTool) ChatTool { } } + // Deep copy Annotations if present + if original.Annotations != nil { + copyAnnotations := &MCPToolAnnotations{ + Title: original.Annotations.Title, + } + if original.Annotations.ReadOnlyHint != nil { + v := *original.Annotations.ReadOnlyHint + copyAnnotations.ReadOnlyHint = &v + } + if original.Annotations.DestructiveHint != nil { + v := *original.Annotations.DestructiveHint + copyAnnotations.DestructiveHint = &v + } + if original.Annotations.IdempotentHint != nil { + v := *original.Annotations.IdempotentHint + copyAnnotations.IdempotentHint = &v + } + if original.Annotations.OpenWorldHint != nil { + v := *original.Annotations.OpenWorldHint + copyAnnotations.OpenWorldHint = &v + } + copyTool.Annotations = copyAnnotations + } + // Deep copy Custom if present if original.Custom != nil { copyTool.Custom = &ChatToolCustom{} diff --git a/core/schemas/videos.go b/core/schemas/videos.go index 9e133c7d52..b1d134889c 100644 --- a/core/schemas/videos.go +++ b/core/schemas/videos.go @@ -156,6 +156,9 @@ func (r *BifrostVideoGenerationResponse) BackfillParams(req *BifrostRequest) { if seconds != nil { r.Seconds = seconds } + if r.Model == "" && req.VideoGenerationRequest != nil { + r.Model = req.VideoGenerationRequest.Model + } } // --- Video Remix --- diff --git a/core/version b/core/version index fd4ca57b8d..3e06e400e9 100644 --- a/core/version +++ b/core/version @@ -1 +1 @@ -1.4.19 +1.4.22 \ No newline at end of file diff --git a/docs/architecture/framework/model-catalog.mdx b/docs/architecture/framework/model-catalog.mdx index 7f37162d15..26b138e070 100644 --- a/docs/architecture/framework/model-catalog.mdx +++ b/docs/architecture/framework/model-catalog.mdx @@ -321,7 +321,7 @@ You can reload the pricing configuration at runtime if you need to change the pr newConfig := &modelcatalog.Config{ PricingSyncInterval: 12 * time.Hour, } -err := modelCatalog.ReloadPricing(ctx, newConfig) +err := modelCatalog.UpdateSyncConfig(ctx, newConfig) ``` ## Error Handling and Fallbacks diff --git a/docs/changelogs/v1.4.23.mdx b/docs/changelogs/v1.4.23.mdx new file mode 100644 index 0000000000..2a5f8d2049 --- /dev/null +++ b/docs/changelogs/v1.4.23.mdx @@ -0,0 +1,110 @@ +--- +title: "v1.4.23" +description: "v1.4.23 changelog - 2026-04-18" +--- + + + ```bash + npx -y @maximhq/bifrost --transport-version v1.4.23 + ``` + + + ```bash + docker pull maximhq/bifrost:v1.4.23 + docker run -p 8080:8080 maximhq/bifrost:v1.4.23 + ``` + + + + +## ✨ Features + +- **Claude Opus 4.7 Support** — Added compatibility for Anthropic's Claude Opus 4.7 model, including adaptive thinking, task-budgets beta header, `display` parameter handling, and "xhigh" effort mapping +- **Anthropic Structured Outputs** — Added `response_format` and structured output support for Anthropic models across chat completions and Responses API, including JSON-schema and JSON-object formats with order-preserving merge of additional model request fields (thanks [@emirhanmutlu-natuvion](https://github.com/emirhanmutlu-natuvion)!) +- **MCP Tool Annotations** — Preserve MCP tool annotations (`title`, `readOnly`, `destructive`, `idempotent`, `openWorld`) in bidirectional conversion between MCP tools and Bifrost chat tools so agents can reason about tool behavior +- **Anthropic Server Tools** — Expanded Anthropic chat schema and Responses converters to surface server-side tools (web search, code execution, computer use containers) end-to-end + +## 🐞 Fixed + +- **Provider Queue Shutdown Panic** — Eliminated `send on closed channel` panics in provider queue shutdown by leaving queue channels open and exiting workers via the `done` signal; stale producers transparently re-route to new queues during `UpdateProvider`, with rollback on failed updates +- **OpenAI Tool Result Output** — Flatten array-form `tool_result` output into a newline-joined string before marshaling for the Responses API so strict upstreams (Ollama Cloud, openai-go typed models) no longer reject it with HTTP 400; non-text blocks (images, files) are preserved (thanks [@martingiguere](https://github.com/martingiguere)!) +- **vLLM Token Usage** — Treat `delta.content=""` the same as `nil` in streaming so the synthesis chunk retains its `finish_reason`, restoring token usage attribution in logs and UI +- **Config Schema Validator** — Corrected JSON-path lookups for concurrency and SCIM blocks in the schema validation script, and reformatted `transports/config.schema.json` for readability +- **CI Egress Hardening** — Switched `step-security/harden-runner` from `audit` to `block` across all GitHub Actions workflows with explicit `allowed-endpoints` per job +- **Gemini Tool Outputs** — Handle content block tool outputs in Responses API path for `function_call_output` messages (thanks [@tom-diacono](https://github.com/tom-diacono)!) +- **Bedrock Streaming** — Emit `message_stop` event for Anthropic invoke stream and case-insensitive `anthropic-beta` header merging (thanks [@tefimov](https://github.com/tefimov)!) +- **Bedrock Tool Images** — Preserve image content blocks in tool results when converting Anthropic Messages to Bedrock Converse API (thanks [@Edward-Upton](https://github.com/Edward-Upton)!) +- **Gemini Thinking Level** — Preserved `thinkingLevel` parameters across round-trip conversions and corrected finish reason mapping +- **Anthropic WebSearch** — Removed the Claude Code user agent restriction so WebSearch tool arguments flow for all clients +- **Responses Streaming Errors** — Capture errors mid-stream in the Responses API so transport clients see failures instead of silent termination +- **Anthropic Request Fallbacks** — Dropped fallback fields from outgoing Anthropic requests to avoid schema validation errors +- **Async Context Propagation** — Preserve context values in async requests so downstream handlers retain request-scoped data +- **Custom Providers** — Allow custom providers without a list-models endpoint to accept any model rather than restricting on virtual key registration +- **OTEL Plugin** — Default `insecure` to `true` in config.json and include fallbacks in emitted OTEL metrics +- **Payload Marshalling** — Removed unnecessary marshalling of payload in the transport path +- **Helm mcpClientConfig** — Fixed templating for `mcpClientConfig` (thanks [@crust3780](https://github.com/crust3780)!) +- **Helm Chart** — Refreshed the helm chart with validation fixes and removed the prerelease tag + + + +- fix: OpenAI provider - flatten array-form tool_result output for Responses API (thanks [@martingiguere](https://github.com/martingiguere)!) +- fix: Gemini provider - handle content block tool outputs in Responses API path (thanks [@tom-diacono](https://github.com/tom-diacono)!) +- fix: case-insensitive `anthropic-beta` merge in `MergeBetaHeaders` +- fix: Bedrock provider - emit message_stop event for Anthropic invoke stream (thanks [@tefimov](https://github.com/tefimov)!) +- fix: Bedrock provider - preserve image content in tool results for Converse API (thanks [@Edward-Upton](https://github.com/Edward-Upton)!) +- fix: gemini preserves thinkingLevel parameters during round-trip and finish reason mapping +- fix: WebSearch tool argument handling for all clients by removing the Claude Code user agent restriction +- fix: capture responses streaming API errors +- fix: delete fallbacks from outgoing Anthropic requests +- feat: claude-opus-4-7 compatibility +- fix: token usage for vllm + + + +- chore: upgraded core to v1.4.20 +- fix: preserve context values in async requests +- fix: capture responses streaming API errors +- fix: otel plugin fixes +- fix: allow custom providers without a list models endpoint to register any model + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 +- fix: allow custom providers without a list models endpoint to pass in any model rather than restrict it on vk + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 +- fix: capture responses streaming API errors + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 +- fix: sets default for `insecure` to `true` for config.json +- fix: includes fallbacks in otel metrics + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 + + + +- chore: upgraded core to v1.4.20 and framework to v1.2.39 + + diff --git a/docs/changelogs/v1.5.0-prerelease2.mdx b/docs/changelogs/v1.5.0-prerelease2.mdx index 2a860a273b..6b7964f7ee 100644 --- a/docs/changelogs/v1.5.0-prerelease2.mdx +++ b/docs/changelogs/v1.5.0-prerelease2.mdx @@ -49,24 +49,8 @@ description: "v1.5.0-prerelease2 changelog - 2026-04-08" - **Data Race Fix** — Fix race in data reading from fasthttp request for integrations - **Model Listing** — Unify /api/models and /api/models/details listing behavior - -**v1.5.0 contains multiple breaking changes.** See the [v1.5.0 Migration Guide](/migration-guides/v1.5.0) for full before/after examples and a migration checklist. - - -## Breaking Changes in This Release - -This prerelease introduces 3 additional breaking changes on top of those in prerelease1. See the **[v1.5.0 Migration Guide](/migration-guides/v1.5.0)** for full before/after examples, automatic migration details, and a step-by-step checklist. - -| # | Breaking Change | Affected | -|---|---|---| -| [9](/migration-guides/v1.5.0#breaking-change-9-provider-deployments-removed-migrate-to-aliases) | Provider `deployments` removed — migrate Azure, Bedrock, Vertex, and Replicate deployment maps to the unified top-level `aliases` field | `config.json`, REST API, Go SDK | -| [10](/migration-guides/v1.5.0#breaking-change-10-go-sdk-extrafields-model-fields-renamed) | Go SDK: `ExtraFields.ModelRequested` replaced by `OriginalModelRequested` + `ResolvedModelUsed` | Go SDK | -| [11](/migration-guides/v1.5.0#breaking-change-11-go-sdk-streamaccumulatorresult-field-renamed) | Go SDK: `StreamAccumulatorResult.Model` replaced by `RequestedModel` + `ResolvedModel` | Go SDK | - ---- -- feat: add model alias support — map model names to provider-specific identifiers per key - feat: add Fireworks AI as a first-class provider (thanks [@ivanetchart](https://github.com/ivanetchart)!) - feat: add realtime provider interfaces, schemas, and engine hooks - feat: add session log storage and realtime request normalization @@ -89,7 +73,6 @@ This prerelease introduces 3 additional breaking changes on top of those in prer -- feat: add model alias storage and encryption in key config - feat: add per-user OAuth consent flow with identity selection and MCP authentication - feat: add access profiles for fine-grained permission control - feat: add user level OAuth for MCP gateway diff --git a/docs/changelogs/v1.5.0-prerelease3.mdx b/docs/changelogs/v1.5.0-prerelease3.mdx new file mode 100644 index 0000000000..b85f049bfc --- /dev/null +++ b/docs/changelogs/v1.5.0-prerelease3.mdx @@ -0,0 +1,94 @@ +--- +title: "v1.5.0-prerelease3" +description: "v1.5.0-prerelease3 changelog - 2026-04-13" +--- + + + ```bash + npx -y @maximhq/bifrost --transport-version v1.5.0-prerelease3 + ``` + + + ```bash + docker pull maximhq/bifrost:v1.5.0-prerelease3 + docker run -p 8080:8080 maximhq/bifrost:v1.5.0-prerelease3 + ``` + + + + +## ✨ Features + +- **OAuth MCP** - add next-step hints to OAuth MCP client creation response +- **Azure passthrough** - added azure passthrough support +- **272k token tier** - add 272k token tier pricing support in pricing +- **Flex and priority tier support** - added flex and priority tier support in pricing + +## 🐞 Fixed + +- **Streaming Post-Hook Race** — Fix race condition where fasthttp RequestCtx could be recycled before transport post-hooks complete in streaming goroutines; eagerly captures request/response snapshots before handler returns +- **Async User Values** — Propagate user values through all async inference handlers and job submissions +- **Trace Completer Safety** — Refactor trace completer to accept transport logs as parameter instead of reading from potentially recycled context +- **Async Log Store Exceptions** — Fix exception handling in async log store jobs +- **Model Alias Tracking** — Split ModelRequested into OriginalModelRequested and ResolvedModelUsed for accurate model alias resolution tracking +- **MCP Tool Discovery** — Add discovered tools and tool name mapping columns to MCP clients + + + +- refactor: split ModelRequested into OriginalModelRequested and ResolvedModelUsed for model alias tracking +- refactor: simplify Azure passthrough by removing redundant config nil checks +- refactor: simplify Mistral error parsing signature +- fix: carry ProviderResponseHeaders through text completion response conversion + + + +- feat: add MCP client discovered tools and tool name mapping migration +- fix: exception handling in async log store jobs +- refactor: model catalog Init API to use SetShouldSyncGate method +- refactor: rename DefaultPricingSyncInterval to DefaultSyncInterval + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + + +- chore: upgraded core to v1.5.2 and framework to v1.3.2 + + diff --git a/docs/contributing/code-conventions.mdx b/docs/contributing/code-conventions.mdx index 7f8a51c6e6..c8a73b2774 100644 --- a/docs/contributing/code-conventions.mdx +++ b/docs/contributing/code-conventions.mdx @@ -321,7 +321,7 @@ make test-all ### TypeScript/React - Use ESLint configuration from project -- Run Prettier for formatting +- Run `npm run format` for formatting - Ensure TypeScript compilation succeeds ## Key Principles diff --git a/docs/contributing/setting-up-repo.mdx b/docs/contributing/setting-up-repo.mdx index 0051ddf9d1..d4ea991fb0 100644 --- a/docs/contributing/setting-up-repo.mdx +++ b/docs/contributing/setting-up-repo.mdx @@ -84,7 +84,7 @@ This command will: 1. Install UI dependencies automatically 2. Install Air for hot reloading 3. Set up the Go workspace with local modules -4. Start the Next.js development server (port 3000) +4. Start the Vite development server (port 3000) 5. Start the API server with UI proxy (port 8080) **Access the application at:** http://localhost:8080 @@ -350,7 +350,7 @@ make setup-workspace **UI dependency issues:** ```bash # Clean and reinstall UI dependencies -rm -rf ui/node_modules ui/.next +rm -rf ui/node_modules make install-ui ``` diff --git a/docs/deployment-guides/helm.mdx b/docs/deployment-guides/helm.mdx index cc09723b8a..3be4c5aa5c 100644 --- a/docs/deployment-guides/helm.mdx +++ b/docs/deployment-guides/helm.mdx @@ -1,740 +1,103 @@ --- -title: "Helm" -description: "Deploy Bifrost on Kubernetes using Helm charts with flexible configuration options" -icon: "helicopter-symbol" +title: "Quick Start" +description: "Deploy Bifrost on Kubernetes using the official Helm chart — quickstart for OSS and Enterprise" +icon: "server" --- -Deploy Bifrost on Kubernetes using the official Helm chart. This is the recommended way to deploy Bifrost on Kubernetes with production-ready defaults and flexible configuration. - -**Latest Chart Version:** 1.5.0 | [View on Artifact Hub](https://artifacthub.io/packages/helm/bifrost/bifrost) +**Latest Chart Version:** 2.1.0 | [View on Artifact Hub](https://artifacthub.io/packages/helm/bifrost/bifrost) + + + + ## Prerequisites - Kubernetes cluster (v1.19+) - `kubectl` configured - Helm 3.2.0+ installed -- (Optional) Persistent Volume provisioner -- (Optional) Ingress controller +- Persistent Volume provisioner (required for SQLite; optional for Postgres-only) If you use PostgreSQL for Bifrost storage, ensure the database is UTF8 encoded. See [PostgreSQL UTF8 Requirement](../quickstart/gateway/setting-up#postgresql-utf8-requirement). -## Quick Start - -### Add Helm Repository +## Step 1 — Add the Helm Repository ```bash helm repo add bifrost https://maximhq.github.io/bifrost/helm-charts helm repo update ``` -### Install Bifrost - -```bash -helm install bifrost bifrost/bifrost --set image.tag=1.3.45 -``` +## Step 2 — Install -The `image.tag` parameter is required. Check [Docker Hub](https://hub.docker.com/r/maximhq/bifrost/tags) for available versions. +The Helm chart ships ready-made values files under `helm-charts/bifrost/values-examples/`. +For example: `sqlite-only.yaml`, `production-ha.yaml`, `external-postgres.yaml`, and `secrets-from-k8s.yaml`. +See the full list here: https://github.com/maximhq/bifrost/tree/main/helm-charts/bifrost/values-examples -This deploys Bifrost with: -- SQLite storage (10Gi PVC) -- Single replica -- ClusterIP service - -### Access Bifrost - -```bash -kubectl port-forward svc/bifrost 8080:8080 -curl http://localhost:8080/metrics -``` - -## Deployment Patterns - - - - -### Development Setup - -Simple setup for local testing and development. - -```bash -helm install bifrost bifrost/bifrost \ - --set image.tag=1.3.45 \ - --set bifrost.providers.openai.keys[0].value="sk-your-key" \ - --set bifrost.providers.openai.keys[0].weight=1 -``` - -**Features:** -- SQLite storage -- Single replica -- No auto-scaling -- ClusterIP service - -**Access:** -```bash -kubectl port-forward svc/bifrost 8080:8080 -``` - - - - - -### Production Setup - -High-availability setup with PostgreSQL and auto-scaling. - -```yaml -# production.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -replicaCount: 3 - -storage: - mode: postgres - -postgresql: - enabled: true - auth: - password: "your-secure-password" - primary: - persistence: - size: 50Gi - resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m - memory: 2Gi - -autoscaling: - enabled: true - minReplicas: 3 - maxReplicas: 10 - targetCPUUtilizationPercentage: 70 - targetMemoryUtilizationPercentage: 80 - -ingress: - enabled: true - className: nginx - annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod - hosts: - - host: bifrost.yourdomain.com - paths: - - path: / - pathType: Prefix - tls: - - secretName: bifrost-tls - hosts: - - bifrost.yourdomain.com - -resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m - memory: 2Gi - -bifrost: - encryptionKey: "your-32-byte-encryption-key" - logLevel: info - - client: - dropExcessRequests: true - enableLogging: true - - providers: - openai: - keys: - - value: "sk-..." - weight: 1 - - plugins: - telemetry: - enabled: true - logging: - enabled: true - governance: - enabled: true -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f production.yaml -``` - -**Features:** -- 3 initial replicas (scales 3-10) -- PostgreSQL database -- Ingress with TLS -- Monitoring enabled - - - - - -### AI Workloads with Semantic Caching - -Optimized for high-volume AI inference with caching. - -```yaml -# ai-workload.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -storage: - mode: postgres - -postgresql: - enabled: true - auth: - password: "secure-password" - primary: - persistence: - size: 50Gi - -vectorStore: - enabled: true - type: weaviate - weaviate: - enabled: true - persistence: - size: 50Gi - resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m - memory: 2Gi - -bifrost: - encryptionKey: "your-encryption-key" - - providers: - openai: - keys: - - value: "sk-..." - weight: 1 - - plugins: - semanticCache: - enabled: true - config: - provider: "openai" - embedding_model: "text-embedding-3-small" - dimension: 1536 - threshold: 0.8 - ttl: "5m" - cache_by_model: true - cache_by_provider: true -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f ai-workload.yaml -``` - -**Features:** -- PostgreSQL for config/logs -- Weaviate for vector storage -- Semantic caching enabled -- Optimized for AI workloads - - - - - -### Multi-Provider Setup - -Support multiple LLM providers with load balancing. - -```yaml -# multi-provider.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -bifrost: - encryptionKey: "your-encryption-key" - - client: - enableLogging: true - allowDirectKeys: false - - providers: - openai: - keys: - - value: "sk-..." - weight: 2 - anthropic: - keys: - - value: "sk-ant-..." - weight: 1 - gemini: - keys: - - value: "..." - weight: 1 - cohere: - keys: - - value: "..." - weight: 1 - - plugins: - telemetry: - enabled: true - logging: - enabled: true -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f multi-provider.yaml -``` - -**Features:** -- Multiple provider support -- Weighted load balancing -- Request/response logging -- Telemetry enabled - - - - - -### External Database - -Use existing PostgreSQL instance. - -```yaml -# external-db.yaml -image: - tag: "1.3.45" # Required: specify the Bifrost version - -storage: - mode: postgres - -postgresql: - enabled: false - external: - enabled: true - host: "postgres.example.com" - port: 5432 - user: "bifrost" - password: "your-password" - database: "bifrost" - sslMode: "require" - -bifrost: - encryptionKey: "your-encryption-key" - - providers: - openai: - keys: - - value: "sk-..." - weight: 1 -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f external-db.yaml -``` - -**Features:** -- Uses external PostgreSQL -- No embedded database -- SSL connection support - - - - - -### Using Kubernetes Secrets - -Store all sensitive values in Kubernetes secrets instead of values files. - -**Prerequisites:** Create Kubernetes secrets first: - -```bash -# PostgreSQL password -kubectl create secret generic postgres-credentials \ - --from-literal=password='your-postgres-password' - -# Encryption key -kubectl create secret generic bifrost-encryption \ - --from-literal=key='your-encryption-key' - -# Provider API keys -kubectl create secret generic provider-api-keys \ - --from-literal=openai-api-key='sk-...' \ - --from-literal=anthropic-api-key='sk-ant-...' - -# Qdrant API key (if using) -kubectl create secret generic qdrant-credentials \ - --from-literal=api-key='your-qdrant-api-key' -``` - -```yaml -# secrets-config.yaml -image: - tag: "1.3.45" - -storage: - mode: postgres - -# External PostgreSQL with secret reference -postgresql: - enabled: false - external: - enabled: true - host: "postgres.example.com" - port: 5432 - user: "bifrost" - database: "bifrost" - sslMode: "require" - existingSecret: "postgres-credentials" - passwordKey: "password" - -# Vector store with secret reference -vectorStore: - enabled: true - type: qdrant - qdrant: - external: - enabled: true - host: "qdrant.example.com" - port: 6334 - existingSecret: "qdrant-credentials" - apiKeyKey: "api-key" - -bifrost: - # Encryption key from secret - encryptionKeySecret: - name: "bifrost-encryption" - key: "key" - - # Provider configs using env var references - providers: - openai: - keys: - - value: "env.OPENAI_API_KEY" - weight: 1 - anthropic: - keys: - - value: "env.ANTHROPIC_API_KEY" - weight: 1 - - # Inject provider secrets as env vars - providerSecrets: - openai: - existingSecret: "provider-api-keys" - key: "openai-api-key" - envVar: "OPENAI_API_KEY" - anthropic: - existingSecret: "provider-api-keys" - key: "anthropic-api-key" - envVar: "ANTHROPIC_API_KEY" -``` - -**Install:** -```bash -helm install bifrost bifrost/bifrost -f secrets-config.yaml -``` - -**Features:** -- No sensitive values in values files -- Secrets managed by Kubernetes -- Works with external secret managers (Vault, AWS Secrets Manager via External Secrets Operator) - - - - -## Configuration - -### Key Parameters - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `image.tag` | **Required.** Bifrost image version (e.g., 1.3.45) | `""` | -| `replicaCount` | Number of replicas | `1` | -| `storage.mode` | Storage backend (sqlite/postgres) | `sqlite` | -| `storage.persistence.size` | PVC size for SQLite | `10Gi` | -| `postgresql.enabled` | Deploy PostgreSQL | `false` | -| `vectorStore.enabled` | Enable vector store | `false` | -| `vectorStore.type` | Vector store type (weaviate/redis/qdrant). Use `redis` for Redis or Valkey-compatible services | `none` | -| `bifrost.encryptionKey` | Encryption key | `""` | -| `ingress.enabled` | Enable ingress | `false` | -| `autoscaling.enabled` | Enable HPA | `false` | - -### Secret Reference Parameters - -Use existing Kubernetes secrets instead of plain-text values: - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `bifrost.encryptionKeySecret.name` | Secret name for encryption key | `""` | -| `bifrost.encryptionKeySecret.key` | Key within the secret | `""` | -| `postgresql.external.existingSecret` | Secret name for PostgreSQL password | `""` | -| `postgresql.external.passwordKey` | Key within the secret | `"password"` | -| `vectorStore.redis.external.existingSecret` | Secret name for Redis password | `""` | -| `vectorStore.redis.external.passwordKey` | Key within the secret | `"password"` | -| `vectorStore.weaviate.external.existingSecret` | Secret name for Weaviate API key | `""` | -| `vectorStore.weaviate.external.apiKeyKey` | Key within the secret | `"api-key"` | -| `vectorStore.qdrant.external.existingSecret` | Secret name for Qdrant API key | `""` | -| `vectorStore.qdrant.external.apiKeyKey` | Key within the secret | `"api-key"` | -| `bifrost.plugins.maxim.secretRef.name` | Secret name for Maxim API key | `""` | -| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` | -| `bifrost.providerSecrets..existingSecret` | Secret name for provider API key | `""` | -| `bifrost.providerSecrets..key` | Key within the secret | `"api-key"` | -| `bifrost.providerSecrets..envVar` | Environment variable name to inject | `""` | - -### Provider Configuration - -Add provider keys via values file: - -```yaml -bifrost: - providers: - openai: - keys: - - value: "sk-..." - weight: 1 - anthropic: - keys: - - value: "sk-ant-..." - weight: 1 -``` - -Or via command line: - -```bash -helm install bifrost bifrost/bifrost \ - --set image.tag=1.3.45 \ - --set bifrost.providers.openai.keys[0].value="sk-..." \ - --set bifrost.providers.openai.keys[0].weight=1 -``` - -#### Using Environment Variables for Provider Keys - -Bifrost supports `env.VAR_NAME` syntax to reference environment variables. Combined with `providerSecrets`, you can keep API keys in Kubernetes secrets: - -```yaml -bifrost: - providers: - openai: - keys: - - value: "env.OPENAI_API_KEY" # References environment variable - weight: 1 - - # Inject secrets as environment variables - providerSecrets: - openai: - existingSecret: "my-openai-secret" - key: "api-key" - envVar: "OPENAI_API_KEY" -``` - -This pattern: -1. Creates a Kubernetes secret with the API key -2. Injects the secret as an environment variable (`OPENAI_API_KEY`) -3. Bifrost resolves `env.OPENAI_API_KEY` at runtime - -### Plugin Configuration - -Enable and configure plugins: - -```yaml -bifrost: - plugins: - telemetry: - enabled: true - config: {} - - logging: - enabled: true - config: {} - - governance: - enabled: true - config: - is_vk_mandatory: false - - semanticCache: - enabled: true - config: - provider: "openai" - embedding_model: "text-embedding-3-small" - dimension: 1536 - threshold: 0.8 - ttl: "5m" - cache_by_model: true - cache_by_provider: true -``` - -## Operations - -### Upgrade - -```bash -# Update repository -helm repo update - -# Upgrade with same values -helm upgrade bifrost bifrost/bifrost --reuse-values - -# Upgrade with new values -helm upgrade bifrost bifrost/bifrost -f your-values.yaml -``` - -### Rollback - -```bash -# View release history -helm history bifrost - -# Rollback to previous version -helm rollback bifrost - -# Rollback to specific revision -helm rollback bifrost 2 -``` - -### Uninstall - -```bash -# Uninstall release -helm uninstall bifrost - -# Delete PVCs (if you want to remove data) -kubectl delete pvc -l app.kubernetes.io/instance=bifrost -``` - -### Scale - -```bash -# Scale manually -kubectl scale deployment bifrost --replicas=5 - -# Or update via Helm -helm upgrade bifrost bifrost/bifrost \ - --set replicaCount=5 \ - --reuse-values -``` - -## Monitoring - -### Prometheus Metrics - -Bifrost exposes Prometheus metrics at `/metrics`. - -Enable ServiceMonitor for automatic scraping: - -```yaml -serviceMonitor: - enabled: true - interval: 30s - scrapeTimeout: 10s -``` - -### Health Checks - -Check pod health: - -```bash -# View pod status -kubectl get pods -l app.kubernetes.io/name=bifrost - -# Check logs -kubectl logs -l app.kubernetes.io/name=bifrost --tail=100 - -# Describe pod -kubectl describe pod -l app.kubernetes.io/name=bifrost -``` - -### Metrics Endpoints - -```bash -# Port forward -kubectl port-forward svc/bifrost 8080:8080 - -# Check metrics -curl http://localhost:8080/metrics - -# Check health -curl http://localhost:8080/health -``` - -## Troubleshooting + + -### Pod Not Starting +Fastest way to get running. Bifrost deploys as a StatefulSet with a 10Gi PVC for SQLite. ```bash -# Check events -kubectl describe pod -l app.kubernetes.io/name=bifrost - -# Check logs -kubectl logs -l app.kubernetes.io/name=bifrost +kubectl create secret generic bifrost-encryption-key \ + --from-literal=encryption-key="$(openssl rand -base64 32)" -# Common issues: -# - Image pull errors: Check repository access -# - PVC binding: Check PVC status -# - Config errors: Validate ConfigMap +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.encryptionKeySecret.name="bifrost-encryption-key" \ + --set bifrost.encryptionKeySecret.key="encryption-key" ``` -### Database Connection Issues + + + +Add your first provider key at install time: ```bash -# For embedded PostgreSQL -kubectl exec -it deployment/bifrost-postgresql -- psql -U bifrost +kubectl create secret generic bifrost-encryption-key \ + --from-literal=encryption-key="$(openssl rand -base64 32)" -# Check connectivity from pod -kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432 +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-your-key' -# Check secret -kubectl get secret bifrost-config -o yaml +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.encryptionKeySecret.name="bifrost-encryption-key" \ + --set bifrost.encryptionKeySecret.key="encryption-key" \ + --set 'bifrost.providers.openai.keys[0].name=primary' \ + --set 'bifrost.providers.openai.keys[0].value=env.OPENAI_API_KEY' \ + --set 'bifrost.providers.openai.keys[0].weight=1' \ + --set bifrost.providerSecrets.openai.existingSecret="provider-keys" \ + --set bifrost.providerSecrets.openai.key="openai-api-key" \ + --set bifrost.providerSecrets.openai.envVar="OPENAI_API_KEY" ``` -### High Memory Usage - -```bash -# Check resource usage -kubectl top pods -l app.kubernetes.io/name=bifrost - -# Increase limits -helm upgrade bifrost bifrost/bifrost \ - --set resources.limits.memory=4Gi \ - --reuse-values -``` + + -### Ingress Not Working +High-availability setup — 3 replicas, PostgreSQL, autoscaling, ingress. ```bash -# Check ingress status -kubectl describe ingress bifrost +# 1. Create secrets +kubectl create secret generic bifrost-encryption-key \ + --from-literal=encryption-key="$(openssl rand -base64 32)" -# Check ingress controller logs -kubectl logs -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx +kubectl create secret generic postgres-credentials \ + --from-literal=password="$(openssl rand -base64 32)" -# Verify DNS -nslookup bifrost.yourdomain.com +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' ``` -## Advanced Configuration - -### Custom Values File - -Create `my-values.yaml`: - ```yaml +# production.yaml image: - tag: "1.3.45" # Required: specify the Bifrost version + tag: "v1.4.11" replicaCount: 3 @@ -744,105 +107,157 @@ storage: postgresql: enabled: true auth: - password: "secure-password" + username: bifrost + database: bifrost + existingSecret: "postgres-credentials" + secretKeys: + adminPasswordKey: "password" + primary: + persistence: + size: 50Gi + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 2Gi autoscaling: enabled: true minReplicas: 3 maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 ingress: enabled: true className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod hosts: - - host: bifrost.example.com + - host: bifrost.yourdomain.com paths: - path: / pathType: Prefix + tls: + - secretName: bifrost-tls + hosts: + - bifrost.yourdomain.com + +resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 2Gi bifrost: - encryptionKey: "your-32-byte-key" + encryptionKeySecret: + name: "bifrost-encryption-key" + key: "encryption-key" + + client: + initialPoolSize: 500 + dropExcessRequests: true + enableLogging: true + providers: openai: keys: - - value: "sk-..." + - name: "openai-primary" + value: "env.OPENAI_API_KEY" weight: 1 -``` -Install: + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + + plugins: + telemetry: + enabled: true + logging: + enabled: true + governance: + enabled: true +``` ```bash -helm install bifrost bifrost/bifrost -f my-values.yaml +# 2. Install +helm install bifrost bifrost/bifrost -f production.yaml ``` -### Environment Variables + + -Add custom environment variables: + +`image.tag` is required — the chart will not start without it. Check [Docker Hub](https://hub.docker.com/r/maximhq/bifrost/tags) for available versions. + -```yaml -env: - - name: CUSTOM_VAR - value: "custom-value" - -envFrom: - - secretRef: - name: bifrost-secrets - - configMapRef: - name: bifrost-config -``` +## Step 3 — Verify -### Node Affinity +```bash +# Check pods are running +kubectl get pods -l app.kubernetes.io/name=bifrost -Deploy to specific nodes: +# Port forward and hit the health endpoint +kubectl port-forward svc/bifrost 8080:8080 +curl http://localhost:8080/health -```yaml -nodeSelector: - node-type: ai-workload +# Check Prometheus metrics +curl http://localhost:8080/metrics +``` -affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchLabels: - app.kubernetes.io/name: bifrost - topologyKey: kubernetes.io/hostname +## Step 4 — Configure Providers & Plugins -tolerations: - - key: "gpu" - operator: "Equal" - value: "true" - effect: "NoSchedule" +```bash +# Make your first inference call +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-mini", + "messages": [{"role": "user", "content": "Hello from Bifrost!"}] + }' ``` -## Enterprise Deployment +Next steps: jump to [Next Steps](#next-steps). + + + + -For enterprise customers, Bifrost provides dedicated container images hosted in private registries with additional features, support, and SLAs. +Enterprise customers receive dedicated container images in a private registry, along with additional features, SLAs, and compliance documentation. [Book a demo](https://calendly.com/maximai/bifrost-demo) to know more about our enterprise features. -### Private Container Registry - -Enterprise customers receive access to Bifrost images in a private container registry. To use your enterprise registry, override the `image.repository` with your provided registry URL: +## Prerequisites - - +- Kubernetes cluster (v1.19+) +- `kubectl` configured +- Helm 3.2.0+ installed +- Enterprise registry credentials (provided by Maxim) -```yaml -# enterprise-gcp.yaml -image: - repository: us-west1-docker.pkg.dev/bifrost-enterprise/your-org/bifrost - tag: "latest" +## Step 1 — Add the Helm Repository -imagePullSecrets: - - name: gcr-secret +```bash +helm repo add bifrost https://maximhq.github.io/bifrost/helm-charts +helm repo update ``` -**Create the pull secret:** +## Step 2 — Create Pull Secret + +Create a Kubernetes image pull secret for our private enterprise registry: + + + ```bash -kubectl create secret docker-registry gcr-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=us-west1-docker.pkg.dev \ --docker-username=_json_key \ --docker-password="$(cat service-account-key.json)" \ @@ -852,46 +267,22 @@ kubectl create secret docker-registry gcr-secret \ -```yaml -# enterprise-aws.yaml -image: - repository: 123456789.dkr.ecr.us-east-1.amazonaws.com/bifrost - tag: "latest" - -imagePullSecrets: - - name: ecr-secret -``` - -**Create the pull secret:** - ```bash -kubectl create secret docker-registry ecr-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=123456789.dkr.ecr.us-east-1.amazonaws.com \ --docker-username=AWS \ --docker-password=$(aws ecr get-login-password --region us-east-1) ``` -ECR tokens expire after 12 hours. Consider using [ECR Credential Helper](https://github.com/awslabs/amazon-ecr-credential-helper) or an operator like [ECR Registry Creds](https://github.com/upmc-enterprises/registry-creds) for automatic token refresh. +ECR tokens expire after 12 hours. Use the [ECR Credential Helper](https://github.com/awslabs/amazon-ecr-credential-helper) or [ECR Registry Creds operator](https://github.com/upmc-enterprises/registry-creds) for automatic refresh. -```yaml -# enterprise-azure.yaml -image: - repository: yourregistry.azurecr.io/bifrost - tag: "latest" - -imagePullSecrets: - - name: acr-secret -``` - -**Create the pull secret:** - ```bash -kubectl create secret docker-registry acr-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=yourregistry.azurecr.io \ --docker-username= \ --docker-password= @@ -900,20 +291,8 @@ kubectl create secret docker-registry acr-secret \ -```yaml -# enterprise-self-hosted.yaml -image: - repository: registry.yourcompany.com/ai/bifrost - tag: "latest" - -imagePullSecrets: - - name: private-registry-secret -``` - -**Create the pull secret:** - ```bash -kubectl create secret docker-registry private-registry-secret \ +kubectl create secret docker-registry enterprise-registry-secret \ --docker-server=registry.yourcompany.com \ --docker-username= \ --docker-password= @@ -922,14 +301,30 @@ kubectl create secret docker-registry private-registry-secret \ -### Full Enterprise Configuration +## Step 3 — Create Required Secrets + +```bash +# Encryption key +kubectl create secret generic bifrost-encryption \ + --from-literal=key="$(openssl rand -base64 32)" + +# Provider API keys +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' \ + --from-literal=anthropic-api-key='sk-ant-...' + +# Admin credentials (for dashboard + governance) +kubectl create secret generic bifrost-admin-credentials \ + --from-literal=username='admin' \ + --from-literal=password='secure-admin-password' +``` -Complete example for enterprise deployments with all recommended settings: +## Step 4 — Install ```yaml -# enterprise-full.yaml +# enterprise.yaml image: - # Your enterprise registry URL (provided by Maxim) + # Registry URL provided by Maxim repository: us-west1-docker.pkg.dev/bifrost-enterprise/your-org/bifrost tag: "latest" @@ -938,7 +333,6 @@ imagePullSecrets: replicaCount: 3 -# Production-grade resources resources: requests: cpu: 1000m @@ -947,7 +341,6 @@ resources: cpu: 4000m memory: 8Gi -# Auto-scaling for high availability autoscaling: enabled: true minReplicas: 3 @@ -955,14 +348,13 @@ autoscaling: targetCPUUtilizationPercentage: 70 targetMemoryUtilizationPercentage: 80 -# PostgreSQL storage storage: mode: postgres postgresql: enabled: true auth: - password: "secure-password" # Use existingSecret in production + password: "secure-password" # use existingSecret in production primary: persistence: size: 100Gi @@ -974,7 +366,6 @@ postgresql: cpu: 4000m memory: 8Gi -# Vector store for semantic caching vectorStore: enabled: true type: weaviate @@ -983,7 +374,6 @@ vectorStore: persistence: size: 100Gi -# Ingress with TLS ingress: enabled: true className: nginx @@ -1000,17 +390,16 @@ ingress: hosts: - bifrost.yourcompany.com -# Bifrost configuration bifrost: encryptionKeySecret: name: "bifrost-encryption" key: "key" - + client: initialPoolSize: 1000 dropExcessRequests: true enableLogging: true - disableContentLogging: false # Set to true for compliance + disableContentLogging: false # set true for HIPAA/compliance logRetentionDays: 365 enforceGovernanceHeader: true allowDirectKeys: false @@ -1018,29 +407,29 @@ bifrost: allowedOrigins: - "https://yourcompany.com" - "https://*.yourcompany.com" - - # Use secrets for provider keys + providers: openai: keys: - - value: "env.OPENAI_API_KEY" + - name: "openai-primary" + value: "env.OPENAI_API_KEY" weight: 1 anthropic: keys: - - value: "env.ANTHROPIC_API_KEY" + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" weight: 1 - + providerSecrets: openai: - existingSecret: "provider-api-keys" + existingSecret: "provider-keys" key: "openai-api-key" envVar: "OPENAI_API_KEY" anthropic: - existingSecret: "provider-api-keys" + existingSecret: "provider-keys" key: "anthropic-api-key" envVar: "ANTHROPIC_API_KEY" - - # Governance with authentication + governance: authConfig: isEnabled: true @@ -1048,8 +437,7 @@ bifrost: existingSecret: "bifrost-admin-credentials" usernameKey: "username" passwordKey: "password" - - # Enable all plugins + plugins: telemetry: enabled: true @@ -1068,7 +456,6 @@ bifrost: threshold: 0.85 ttl: "1h" -# Pod distribution affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -1078,52 +465,159 @@ affinity: topologyKey: kubernetes.io/hostname ``` -### Enterprise Prerequisites +```bash +helm install bifrost bifrost/bifrost -f enterprise.yaml +``` + +Next steps: jump to [Next Steps](#next-steps). + +## Enterprise Support + +Enterprise customers have access to: +- Dedicated Slack channel for support +- Priority bug fixes and feature requests +- Custom feature development +- SLA guarantees +- Compliance documentation (SOC2, HIPAA, etc.) + +Contact [support@getmaxim.ai](mailto:support@getmaxim.ai) for support. + + + + + +--- + +## Operations -Before deploying, create the required secrets: +### Upgrade ```bash -# 1. Registry pull secret (see registry-specific instructions above) +helm repo update -# 2. Encryption key -kubectl create secret generic bifrost-encryption \ - --from-literal=key='your-32-byte-encryption-key' +# Upgrade reusing all existing values +helm upgrade bifrost bifrost/bifrost --reuse-values -# 3. Provider API keys -kubectl create secret generic provider-api-keys \ - --from-literal=openai-api-key='sk-...' \ - --from-literal=anthropic-api-key='sk-ant-...' +# Upgrade with new values +helm upgrade bifrost bifrost/bifrost -f your-values.yaml -# 4. Admin credentials (for governance) -kubectl create secret generic bifrost-admin-credentials \ - --from-literal=username='admin' \ - --from-literal=password='secure-admin-password' +# Upgrade and override a single field +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set image.tag=v1.4.11 ``` -### Install Enterprise Build +### Rollback ```bash -helm install bifrost bifrost/bifrost -f enterprise-full.yaml +helm history bifrost +helm rollback bifrost # to previous revision +helm rollback bifrost 2 # to specific revision ``` -### Enterprise Support +### Scale -Enterprise customers have access to: -- Dedicated Slack channel for support -- Priority bug fixes and feature requests -- Custom feature development -- SLA guarantees -- Compliance documentation (SOC2, HIPAA, etc.) +```bash +kubectl scale deployment bifrost --replicas=5 + +# Or via Helm +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set replicaCount=5 +``` + +### Uninstall + +```bash +helm uninstall bifrost + +# Also remove PVCs (permanently deletes all data) +kubectl delete pvc -l app.kubernetes.io/instance=bifrost +``` + +--- + +## Monitoring + +### Prometheus Metrics + +Bifrost exposes Prometheus metrics at `/metrics`. + +Enable ServiceMonitor for automatic scraping: + +```yaml +serviceMonitor: + enabled: true + interval: 30s + scrapeTimeout: 10s +``` -Contact [support@getmaxim.ai](mailto:support@getmaxim.ai) for enterprise support. +### Health Checks + +Check pod health: + +```bash +# View pod status +kubectl get pods -l app.kubernetes.io/name=bifrost + +# Check logs +kubectl logs -l app.kubernetes.io/name=bifrost --tail=100 + +# Describe pod +kubectl describe pod -l app.kubernetes.io/name=bifrost +``` + +### Metrics Endpoints + +```bash +# Port forward +kubectl port-forward svc/bifrost 8080:8080 + +# Check metrics +curl http://localhost:8080/metrics + +# Check health +curl http://localhost:8080/health +``` + +--- + +## Configuration Guides + + + + All parameters, secret references, advanced config, example patterns + + + Pool size, logging, CORS, header filtering, compat shims, MCP settings + + + OpenAI, Anthropic, Azure, Bedrock, Vertex, Groq, self-hosted + + + SQLite, PostgreSQL, object storage for logs, vector stores + + + Telemetry, logging, semantic cache, OTel, Datadog, governance + + + Budgets, rate limits, virtual keys, routing rules + + + Multi-replica HA, gossip, peer discovery + + + Pod startup, database, ingress, PVC, secrets, performance + + + +--- ## Resources - [Helm Chart Repository](https://github.com/maximhq/bifrost/tree/main/helm-charts) - [Artifact Hub](https://artifacthub.io/packages/helm/bifrost/bifrost) -- [Complete Installation Guide](https://github.com/maximhq/bifrost/blob/main/helm-charts/INSTALL.md) - [Example Configurations](https://github.com/maximhq/bifrost/tree/main/helm-charts/bifrost/values-examples) -- [Kubernetes Secrets Example](https://github.com/maximhq/bifrost/blob/main/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml) - [GitHub Issues](https://github.com/maximhq/bifrost/issues) ## Next Steps diff --git a/docs/deployment-guides/helm/client.mdx b/docs/deployment-guides/helm/client.mdx new file mode 100644 index 0000000000..b3fd2dc968 --- /dev/null +++ b/docs/deployment-guides/helm/client.mdx @@ -0,0 +1,316 @@ +--- +title: "Client Configuration" +description: "Configure the Bifrost client: connection pool, logging, CORS, header filtering, compat shims, and MCP settings" +icon: "gear" +--- + +The `bifrost.client` block controls how Bifrost manages its internal worker pool, request logging, authentication enforcement, header policies, SDK compatibility shims, and MCP agent behaviour. All settings map directly to the `client` section of the rendered `config.json`. + +--- + +## Connection Pool + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.initialPoolSize` | Pre-allocated worker goroutines per provider queue | `300` | +| `bifrost.client.dropExcessRequests` | Drop requests when queue is full instead of waiting | `false` | + +A larger pool reduces latency spikes under burst load at the cost of higher baseline memory. For production workloads with multiple providers, `1000` is a common starting point. + +```yaml +# client-pool.yaml +image: + tag: "v1.4.11" + +bifrost: + client: + initialPoolSize: 1000 + dropExcessRequests: true # Return 429 instead of queuing indefinitely +``` + +```bash +helm install bifrost bifrost/bifrost -f client-pool.yaml + +# Or set inline +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.client.initialPoolSize=1000 \ + --set bifrost.client.dropExcessRequests=true +``` + +--- + +## Request & Response Logging + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.enableLogging` | Log all LLM requests and responses | `true` | +| `bifrost.client.disableContentLogging` | Strip message content from logs (keeps metadata) | `false` | +| `bifrost.client.logRetentionDays` | Days to retain log entries in the store | `365` | +| `bifrost.client.loggingHeaders` | HTTP request headers to capture in log metadata | `[]` | + +Set `disableContentLogging: true` for HIPAA / PCI compliance workloads where message content must not be persisted. + +```yaml +bifrost: + client: + enableLogging: true + disableContentLogging: true # PII / compliance: store metadata only + logRetentionDays: 90 + loggingHeaders: + - "x-request-id" + - "x-user-id" +``` + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.client.disableContentLogging=true \ + --set bifrost.client.logRetentionDays=90 +``` + +--- + +## Security & CORS + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.allowedOrigins` | CORS allowed origins | `["*"]` | +| `bifrost.client.allowDirectKeys` | Allow callers to pass provider keys directly in requests | `false` | +| `bifrost.client.enforceGovernanceHeader` | Require `x-bf-vk` virtual-key header on every request | `false` | +| `bifrost.client.maxRequestBodySizeMb` | Maximum allowed request body size | `100` | +| `bifrost.client.whitelistedRoutes` | Routes that bypass auth middleware | `[]` | + +```yaml +bifrost: + client: + allowedOrigins: + - "https://app.yourdomain.com" + - "https://admin.yourdomain.com" + allowDirectKeys: false # Prevent callers from supplying raw provider keys + enforceGovernanceHeader: true # Every request must carry a virtual key + maxRequestBodySizeMb: 50 + whitelistedRoutes: + - "/health" + - "/metrics" +``` + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.client.enforceGovernanceHeader=true \ + --set bifrost.client.allowDirectKeys=false +``` + +--- + +## Header Filtering + +Controls which `x-bf-eh-*` headers are forwarded to upstream LLM providers. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.headerFilterConfig.allowlist` | Only these headers are forwarded (whitelist mode) | `[]` | +| `bifrost.client.headerFilterConfig.denylist` | These headers are always blocked | `[]` | +| `bifrost.client.requiredHeaders` | Headers that must be present on every request | `[]` | +| `bifrost.client.allowedHeaders` | Additional headers permitted for CORS and WebSocket | `[]` | + +When both lists are empty, all `x-bf-eh-*` headers pass through. Specifying an `allowlist` enables strict whitelist mode — only listed headers are forwarded. + +```yaml +bifrost: + client: + headerFilterConfig: + allowlist: + - "x-bf-eh-anthropic-version" + - "x-bf-eh-openai-beta" + denylist: [] + requiredHeaders: + - "x-request-id" +``` + +--- + +## Authentication + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.authConfig.isEnabled` | Enable username/password auth for the API and dashboard | `false` | +| `bifrost.authConfig.adminUsername` | Admin username (plain text, prefer secret) | `""` | +| `bifrost.authConfig.adminPassword` | Admin password (plain text, prefer secret) | `""` | +| `bifrost.authConfig.existingSecret` | Kubernetes Secret name for credentials | `""` | +| `bifrost.authConfig.usernameKey` | Key within the secret for username | `"username"` | +| `bifrost.authConfig.passwordKey` | Key within the secret for password | `"password"` | +| `bifrost.authConfig.disableAuthOnInference` | Skip auth check on `/v1/*` inference routes | `false` | + +```bash +# Create secret first +kubectl create secret generic bifrost-admin \ + --from-literal=username='admin' \ + --from-literal=password='your-secure-password' +``` + +```yaml +bifrost: + authConfig: + isEnabled: true + disableAuthOnInference: false + existingSecret: "bifrost-admin" + usernameKey: "username" + passwordKey: "password" +``` + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + -f auth-values.yaml +``` + +--- + +## Encryption + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.encryptionKey` | 32-byte encryption key (plain text — use secret in production) | `""` | +| `bifrost.encryptionKeySecret.name` | Kubernetes Secret name containing the key | `""` | +| `bifrost.encryptionKeySecret.key` | Key within the secret | `"encryption-key"` | + +Always use a Kubernetes Secret in production: + +```bash +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key-here' +``` + +```yaml +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" +``` + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + -f encryption-values.yaml +``` + +--- + +## Async Jobs & Database Pings + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.disableDbPingsInHealth` | Exclude DB connectivity from `/health` checks | `false` | +| `bifrost.client.asyncJobResultTTL` | TTL (seconds) for async job results | `3600` | + +--- + +## Compat Shims + +Compatibility flags that let Bifrost silently adapt request/response shapes for SDK integrations: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.compat.convertTextToChat` | Wrap legacy text completions as chat messages | `false` | +| `bifrost.client.compat.convertChatToResponses` | Translate chat completions to Responses API format | `false` | +| `bifrost.client.compat.shouldDropParams` | Silently drop unsupported parameters instead of erroring | `false` | +| `bifrost.client.compat.shouldConvertParams` | Auto-convert parameter names across provider schemas | `false` | + +```yaml +bifrost: + client: + compat: + shouldDropParams: true # Useful when proxying mixed SDK traffic + convertTextToChat: true # For clients using the legacy /v1/completions endpoint +``` + +--- + +## Prometheus Labels + +Add custom labels to every Prometheus metric emitted by Bifrost: + +```yaml +bifrost: + client: + prometheusLabels: + - name: "environment" + value: "production" + - name: "region" + value: "us-east-1" +``` + +--- + +## MCP Agent Settings + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.client.mcpAgentDepth` | Maximum tool-call recursion depth for MCP agent mode | `10` | +| `bifrost.client.mcpToolExecutionTimeout` | Timeout per tool execution in seconds | `30` | +| `bifrost.client.mcpCodeModeBindingLevel` | Code mode binding level (`server` or `tool`) | `""` | +| `bifrost.client.mcpToolSyncInterval` | Global tool sync interval in minutes (`0` = disabled) | `0` | + +```yaml +bifrost: + client: + mcpAgentDepth: 15 + mcpToolExecutionTimeout: 60 +``` + +--- + +## Full Example + +```yaml +# client-full.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + authConfig: + isEnabled: true + disableAuthOnInference: false + existingSecret: "bifrost-admin" + usernameKey: "username" + passwordKey: "password" + + client: + initialPoolSize: 1000 + dropExcessRequests: true + allowedOrigins: + - "https://app.yourdomain.com" + enableLogging: true + disableContentLogging: false + logRetentionDays: 90 + enforceGovernanceHeader: true + allowDirectKeys: false + maxRequestBodySizeMb: 100 + headerFilterConfig: + allowlist: [] + denylist: [] + prometheusLabels: + - name: "environment" + value: "production" + mcpAgentDepth: 10 + mcpToolExecutionTimeout: 30 +``` + +```bash +# Create prerequisites +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key-here' + +kubectl create secret generic bifrost-admin \ + --from-literal=username='admin' \ + --from-literal=password='your-secure-password' + +# Install +helm install bifrost bifrost/bifrost -f client-full.yaml +``` diff --git a/docs/deployment-guides/helm/cluster.mdx b/docs/deployment-guides/helm/cluster.mdx new file mode 100644 index 0000000000..ea86536e5c --- /dev/null +++ b/docs/deployment-guides/helm/cluster.mdx @@ -0,0 +1,513 @@ +--- +title: "Cluster Mode & HA" +description: "Run Bifrost in a multi-replica cluster with gossip-based peer discovery, distributed state sync, and high-availability configuration" +icon: "network-wired" +--- + +Cluster mode enables multiple Bifrost replicas to share state — rate limits, budget counters, and governance data — across pods. When `bifrost.cluster.enabled` is `false` (the default), each replica operates independently and state is only shared via the database. + + +Cluster mode requires **PostgreSQL** as the storage backend. SQLite is single-node only. + + +## When to Use Cluster Mode + +| Scenario | Recommendation | +|----------|---------------| +| Single replica | Not needed | +| Multiple replicas, shared DB only | Optional — DB provides eventual consistency | +| Multiple replicas with strict per-minute rate limiting | **Enable cluster mode** — in-memory counters are synced via gossip | +| Geographic multi-region | Enable cluster mode with DNS or Consul discovery | + +--- + +## Basic Cluster Setup + +```yaml +# cluster-values.yaml +image: + tag: "v1.4.11" + +replicaCount: 3 + +storage: + mode: postgres + +postgresql: + external: + enabled: true + host: "your-postgres-host.example.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + cluster: + enabled: true + gossip: + port: 7946 + config: + timeoutSeconds: 10 + successThreshold: 3 + failureThreshold: 3 + +# Spread replicas across nodes for true HA +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +# Conservative scale-down: avoid killing pods mid-stream +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Pods + value: 1 + periodSeconds: 120 + +# Give in-flight SSE streams time to drain +terminationGracePeriodSeconds: 90 +lifecycle: + preStop: + exec: + command: ["sh", "-c", "sleep 20"] +``` + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-postgres-password' + +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key' + +helm install bifrost bifrost/bifrost -f cluster-values.yaml +``` + +--- + +## Peer Discovery + +Bifrost uses a gossip protocol (memberlist) for peer-to-peer state sync. Configure how peers find each other: + + + + + +Bifrost queries the Kubernetes API to find other Bifrost pods by label selector. No static peer list needed — works with HPA. + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: kubernetes + k8sNamespace: "default" # namespace where Bifrost runs + k8sLabelSelector: "app.kubernetes.io/name=bifrost" + gossip: + port: 7946 +``` + +The service account needs permission to list pods: + +```yaml +serviceAccount: + create: true + annotations: {} +``` + +```bash +# Create a ClusterRole and binding for pod discovery (apply once) +kubectl apply -f - <<'EOF' +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bifrost-pod-discovery + namespace: default +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["list", "get", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bifrost-pod-discovery + namespace: default +subjects: + - kind: ServiceAccount + name: bifrost + namespace: default +roleRef: + kind: Role + name: bifrost-pod-discovery + apiGroup: rbac.authorization.k8s.io +EOF +``` + +```bash +helm install bifrost bifrost/bifrost -f cluster-k8s-discovery-values.yaml +``` + + + + + +Uses a headless service DNS name to resolve peer IPs. Works well with StatefulSets (predictable pod DNS names). + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: dns + dnsNames: + - "bifrost-headless.default.svc.cluster.local" + gossip: + port: 7946 +``` + +The chart automatically creates a headless service (`bifrost-headless`) when cluster mode is enabled with a StatefulSet. For Deployments, create it manually: + +```bash +kubectl apply -f - <<'EOF' +apiVersion: v1 +kind: Service +metadata: + name: bifrost-headless +spec: + clusterIP: None + selector: + app.kubernetes.io/name: bifrost + ports: + - name: gossip + port: 7946 + protocol: TCP +EOF +``` + +```bash +helm install bifrost bifrost/bifrost -f cluster-dns-discovery-values.yaml +``` + + + + + +Enumerate peer addresses explicitly. Use when discovery mechanisms are unavailable or you want deterministic membership. + +```yaml +bifrost: + cluster: + enabled: true + peers: + - "bifrost-0.bifrost-headless.default.svc.cluster.local:7946" + - "bifrost-1.bifrost-headless.default.svc.cluster.local:7946" + - "bifrost-2.bifrost-headless.default.svc.cluster.local:7946" + gossip: + port: 7946 +``` + + +Static peers require StatefulSet pod names to be stable. This approach doesn't adapt to HPA-driven scaling — use Kubernetes or DNS discovery for dynamic replica counts. + + + + + + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: consul + consulAddress: "consul.consul.svc.cluster.local:8500" + gossip: + port: 7946 +``` + +```bash +helm install bifrost bifrost/bifrost -f cluster-consul-discovery-values.yaml +``` + + + + + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: etcd + etcdEndpoints: + - "http://etcd-0.etcd.default.svc.cluster.local:2379" + - "http://etcd-1.etcd.default.svc.cluster.local:2379" + - "http://etcd-2.etcd.default.svc.cluster.local:2379" + gossip: + port: 7946 +``` + + + + + +Best for local development or bare-metal clusters where multicast is available. + +```yaml +bifrost: + cluster: + enabled: true + discovery: + enabled: true + type: mdns + mdnsService: "_bifrost._tcp" + gossip: + port: 7946 +``` + + + + + +--- + +## Allowed Address Space + +Restrict gossip to a specific subnet (useful in multi-tenant clusters): + +```yaml +bifrost: + cluster: + discovery: + enabled: true + type: kubernetes + k8sNamespace: "default" + k8sLabelSelector: "app.kubernetes.io/name=bifrost" + allowedAddressSpace: + - "10.0.0.0/8" + - "172.16.0.0/12" +``` + +--- + +## Region-Aware Routing + +Tag replicas with a region identifier for latency-aware routing: + +```yaml +bifrost: + cluster: + enabled: true + region: "us-east-1" +``` + +--- + +## Full HA Production Example + +```yaml +# ha-production-values.yaml +image: + tag: "v1.4.11" + +replicaCount: 3 + +resources: + requests: + cpu: 1000m + memory: 1Gi + limits: + cpu: 4000m + memory: 4Gi + +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 15 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 75 + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Pods + value: 1 + periodSeconds: 120 + scaleUp: + stabilizationWindowSeconds: 30 + +terminationGracePeriodSeconds: 90 +lifecycle: + preStop: + exec: + command: ["sh", "-c", "sleep 20"] + +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + hosts: + - host: bifrost.yourdomain.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: bifrost-tls + hosts: + - bifrost.yourdomain.com + +storage: + mode: postgres + +postgresql: + external: + enabled: true + host: "rds.us-east-1.amazonaws.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + client: + initialPoolSize: 1000 + dropExcessRequests: true + enableLogging: true + enforceGovernanceHeader: true + + cluster: + enabled: true + region: "us-east-1" + discovery: + enabled: true + type: kubernetes + k8sNamespace: "default" + k8sLabelSelector: "app.kubernetes.io/name=bifrost" + gossip: + port: 7946 + config: + timeoutSeconds: 10 + successThreshold: 3 + failureThreshold: 3 + + plugins: + telemetry: + enabled: true + config: + push_gateway: + enabled: true + push_gateway_url: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091" + push_interval: 15 + logging: + enabled: true + governance: + enabled: true + config: + is_vk_mandatory: true + +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +serviceAccount: + create: true + annotations: {} +``` + +```bash +# Prerequisites +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-secure-postgres-password' + +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-encryption-key' + +# RBAC for Kubernetes pod discovery +kubectl apply -f - <<'EOF' +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bifrost-pod-discovery + namespace: default +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["list", "get", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bifrost-pod-discovery + namespace: default +subjects: + - kind: ServiceAccount + name: bifrost + namespace: default +roleRef: + kind: Role + name: bifrost-pod-discovery + apiGroup: rbac.authorization.k8s.io +EOF + +# Install +helm install bifrost bifrost/bifrost -f ha-production-values.yaml + +# Verify all peers have found each other (check logs) +kubectl logs -l app.kubernetes.io/name=bifrost --tail=50 | grep -i gossip +``` + +--- + +## Verifying Cluster Health + +```bash +# Check all pods are running +kubectl get pods -l app.kubernetes.io/name=bifrost + +# Check gossip port is reachable between pods +kubectl exec -it bifrost-0 -- nc -zv bifrost-1.bifrost-headless 7946 + +# Check health endpoint +kubectl port-forward svc/bifrost 8080:8080 & +curl http://localhost:8080/health + +# View HPA status +kubectl get hpa bifrost + +# Scale manually during maintenance +kubectl scale deployment bifrost --replicas=5 +``` diff --git a/docs/deployment-guides/helm/governance.mdx b/docs/deployment-guides/helm/governance.mdx new file mode 100644 index 0000000000..3679d214d4 --- /dev/null +++ b/docs/deployment-guides/helm/governance.mdx @@ -0,0 +1,422 @@ +--- +title: "Governance" +description: "Configure Bifrost governance in Helm — budgets, rate limits, virtual keys, routing rules, and admin authentication" +icon: "shield" +--- + +Governance lets you control who can call which providers, how much they can spend, how fast they can go, and how traffic is routed. Everything is declared under `bifrost.governance` in your values file and seeded into the database at startup. + + +The governance **plugin** must also be enabled for enforcement to take effect: + +```yaml +bifrost: + plugins: + governance: + enabled: true +``` + +See the [Plugins](/deployment-guides/helm/plugins) page for plugin configuration details. + + +--- + +## Admin Authentication + +Protect the Bifrost dashboard and management API with username/password auth. + +```bash +kubectl create secret generic bifrost-admin-credentials \ + --from-literal=username='admin' \ + --from-literal=password='your-secure-admin-password' +``` + +```yaml +bifrost: + governance: + authConfig: + isEnabled: true + disableAuthOnInference: false # keep auth on inference routes + existingSecret: "bifrost-admin-credentials" + usernameKey: "username" + passwordKey: "password" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f governance-auth-values.yaml +``` + +--- + +## Budgets + +Spending caps that reset on a configurable period. Budgets are referenced by ID from virtual keys, teams, customers, or providers. + +| Reset duration | Syntax | +|----------------|--------| +| 30 seconds | `"30s"` | +| 5 minutes | `"5m"` | +| 1 hour | `"1h"` | +| 1 day | `"1d"` | +| 1 week | `"1w"` | +| 1 month | `"1M"` | +| 1 year | `"1Y"` | + +```yaml +bifrost: + governance: + budgets: + - id: "budget-dev" + max_limit: 50 # $50 per month + reset_duration: "1M" + + - id: "budget-production" + max_limit: 500 # $500 per month + reset_duration: "1M" + + - id: "budget-testing" + max_limit: 10 # $10 per day + reset_duration: "1d" + + - id: "budget-enterprise" + max_limit: 5000 # $5000 per month + reset_duration: "1M" +``` + +--- + +## Rate Limits + +Token and request-count caps per time window. Referenced by ID from virtual keys, teams, customers, or providers. + +```yaml +bifrost: + governance: + rateLimits: + - id: "rate-limit-standard" + token_max_limit: 100000 # 100K tokens per hour + token_reset_duration: "1h" + request_max_limit: 1000 # 1000 requests per hour + request_reset_duration: "1h" + + - id: "rate-limit-high" + token_max_limit: 500000 # 500K tokens per hour + token_reset_duration: "1h" + request_max_limit: 5000 + request_reset_duration: "1h" + + - id: "rate-limit-burst" + token_max_limit: 50000 # 50K tokens per minute (burst) + token_reset_duration: "1m" + request_max_limit: 500 + request_reset_duration: "1m" + + - id: "rate-limit-testing" + token_max_limit: 10000 + token_reset_duration: "1h" + request_max_limit: 100 + request_reset_duration: "1h" +``` + +--- + +## Customers & Teams + +Optional organizational hierarchy. Virtual keys can be assigned to customers or teams, inheriting their budgets and rate limits. + +```yaml +bifrost: + governance: + customers: + - id: "customer-acme" + name: "Acme Corp" + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + + - id: "customer-startup" + name: "Startup Inc" + budget_id: "budget-dev" + rate_limit_id: "rate-limit-standard" + + teams: + - id: "team-platform" + name: "Platform Team" + customer_id: "customer-acme" + budget_id: "budget-enterprise" + rate_limit_id: "rate-limit-high" + + - id: "team-ml" + name: "ML Team" + customer_id: "customer-acme" + budget_id: "budget-production" + rate_limit_id: "rate-limit-standard" +``` + +--- + +## Virtual Keys + +Virtual keys are the primary access tokens issued to callers. They scope which providers, models, and underlying API keys are accessible. + +```yaml +bifrost: + governance: + virtualKeys: + # 1. Unrestricted dev key — access to every provider + - id: "vk-dev-all" + name: "Dev: all providers" + value: "vk-dev-all-secret-token" + is_active: true + budget_id: "budget-dev" + rate_limit_id: "rate-limit-standard" + # No provider_configs → all providers allowed + + # 2. OpenAI only — restricted to two models + - id: "vk-openai-prod" + name: "OpenAI Production" + value: "vk-openai-prod-secret-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["gpt-4o", "gpt-4o-mini"] + # No keys[] → all configured OpenAI keys allowed + + # 3. Multi-provider with weighted routing + - id: "vk-multi" + name: "Multi-provider weighted" + value: "vk-multi-secret-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + provider_configs: + - provider: "openai" + weight: 2 # 50% + allowed_models: ["*"] + - provider: "anthropic" + weight: 1 # 25% + allowed_models: ["*"] + - provider: "groq" + weight: 1 # 25% + allowed_models: ["*"] + + # 4. Team-scoped key + - id: "vk-platform-team" + name: "Platform Team Key" + value: "vk-platform-team-token" + is_active: true + team_id: "team-platform" # inherits team budget/rate-limit + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["*"] + keys: + - name: "openai-primary" # pin to specific configured key + + # 5. Restricted testing key + - id: "vk-testing" + name: "Testing (gpt-4o-mini only)" + value: "vk-testing-token" + is_active: true + budget_id: "budget-testing" + rate_limit_id: "rate-limit-testing" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["gpt-4o-mini"] + + # 6. Batch API key + - id: "vk-batch" + name: "Batch API workloads" + value: "vk-batch-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-burst" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["*"] + keys: + - name: "openai-batch" # only the batch-flagged key +``` + +**Use a virtual key in API calls:** + +```bash +curl http://localhost:8080/v1/chat/completions \ + -H "x-bf-vk: vk-openai-prod-secret-token" \ + -H "Content-Type: application/json" \ + -d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello"}]}' +``` + +--- + +## Model Configs + +Apply budgets and rate limits at the model level, independent of virtual keys: + +```yaml +bifrost: + governance: + modelConfigs: + - id: "model-gpt4o" + model_name: "gpt-4o" + provider: "openai" + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + + - id: "model-claude" + model_name: "claude-3-5-sonnet-20241022" + provider: "anthropic" + rate_limit_id: "rate-limit-standard" +``` + +--- + +## Provider Governance + +Apply budgets and rate limits at the provider level: + +```yaml +bifrost: + governance: + providers: + - name: "openai" + budget_id: "budget-production" + rate_limit_id: "rate-limit-high" + send_back_raw_request: false + send_back_raw_response: false + + - name: "anthropic" + budget_id: "budget-production" + rate_limit_id: "rate-limit-standard" +``` + +--- + +## Routing Rules + +CEL-expression-based routing rules redirect requests to different providers or models based on request attributes. + +| Field | Description | +|-------|-------------| +| `cel_expression` | CEL expression evaluated against the request; if `true`, rule fires | +| `targets` | Provider/model targets with weights | +| `fallbacks` | Providers to try if all targets fail | +| `scope` | `global`, `team`, `customer`, or `virtual_key` | +| `scope_id` | Required for non-global scopes | +| `priority` | Lower number = evaluated first | + +```yaml +bifrost: + governance: + routingRules: + # Route all GPT requests to Azure + - id: "route-gpt-to-azure" + name: "GPT → Azure" + description: "Route all GPT model requests to Azure OpenAI" + enabled: true + cel_expression: "model.startsWith('gpt-')" + targets: + - provider: "azure" + model: "" # empty = use original model name + weight: 1.0 + fallbacks: ["openai"] + scope: "global" + priority: 0 + + # Route heavy models to a slower but cheaper provider + - id: "route-heavy-to-groq" + name: "Large context → Groq" + enabled: true + cel_expression: "model == 'gpt-4o' && request_body.max_tokens > 4000" + targets: + - provider: "groq" + model: "llama-3.3-70b-versatile" + weight: 1.0 + fallbacks: ["openai"] + scope: "global" + priority: 1 + + # Team-scoped rule + - id: "route-ml-team-bedrock" + name: "ML Team → Bedrock" + enabled: true + cel_expression: "true" # match all requests for this scope + targets: + - provider: "bedrock" + model: "" + weight: 1.0 + fallbacks: ["openai"] + scope: "team" + scope_id: "team-ml" + priority: 0 +``` + +--- + +## Full Example + +```yaml +# governance-full-values.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + plugins: + governance: + enabled: true + config: + is_vk_mandatory: true + + governance: + authConfig: + isEnabled: true + existingSecret: "bifrost-admin-credentials" + usernameKey: "username" + passwordKey: "password" + + budgets: + - id: "budget-production" + max_limit: 500 + reset_duration: "1M" + - id: "budget-dev" + max_limit: 50 + reset_duration: "1M" + + rateLimits: + - id: "rate-limit-standard" + token_max_limit: 100000 + token_reset_duration: "1h" + request_max_limit: 1000 + request_reset_duration: "1h" + + virtualKeys: + - id: "vk-production" + name: "Production" + value: "vk-prod-secret-token" + is_active: true + budget_id: "budget-production" + rate_limit_id: "rate-limit-standard" + provider_configs: + - provider: "openai" + weight: 1 + allowed_models: ["gpt-4o", "gpt-4o-mini"] +``` + +```bash +kubectl create secret generic bifrost-encryption \ + --from-literal=encryption-key='your-32-byte-key' + +kubectl create secret generic bifrost-admin-credentials \ + --from-literal=username='admin' \ + --from-literal=password='secure-admin-password' + +helm install bifrost bifrost/bifrost -f governance-full-values.yaml +``` diff --git a/docs/deployment-guides/helm/guardrails.mdx b/docs/deployment-guides/helm/guardrails.mdx new file mode 100644 index 0000000000..60ec2710d5 --- /dev/null +++ b/docs/deployment-guides/helm/guardrails.mdx @@ -0,0 +1,279 @@ +--- +title: "Guardrails" +description: "Configure guardrails providers and rules in Bifrost Helm deployments" +icon: "shield-halved" +--- + + +Guardrails are an **enterprise-only** feature. They require the enterprise Bifrost image. + + +Guardrails are configured under `bifrost.guardrails` in your values file. The configuration has two parts: + +- **`providers`** — the backend that performs the check. Rules link to providers by `id`. +- **`rules`** — CEL expressions that control when and where providers are invoked. + +--- + +## Providers + + + + +Runs entirely in-process with no external dependency. Patterns use RE2 syntax. Supports optional per-pattern flags: `i` (case-insensitive), `m` (multiline), `s` (dot-all). + +```yaml +bifrost: + guardrails: + providers: + - id: 1 + provider_name: "regex" + policy_name: "block-secrets" + enabled: true + timeout: 5 + config: + patterns: + - pattern: "sk-[A-Za-z0-9]{20,}" + description: "OpenAI API key" + - pattern: "AKIA[0-9A-Z]{16}" + description: "AWS access key" + flags: "i" + - pattern: "gh[ps]_[A-Za-z0-9]{36}" + description: "GitHub token" +``` + + + + +```yaml +bifrost: + guardrails: + providers: + - id: 2 + provider_name: "bedrock" + policy_name: "content-filter" + enabled: true + timeout: 15 + config: + guardrail_arn: "arn:aws:bedrock:us-east-1::guardrail/abc123" + guardrail_version: "DRAFT" # or a published version number + region: "us-east-1" + access_key: "env.AWS_ACCESS_KEY_ID" # omit to use instance role + secret_key: "env.AWS_SECRET_ACCESS_KEY" +``` + + + + +```yaml +bifrost: + guardrails: + providers: + - id: 3 + provider_name: "azure" + policy_name: "azure-content-safety" + enabled: true + timeout: 10 + config: + endpoint: "https://your-resource.cognitiveservices.azure.com" + api_key: "env.AZURE_CONTENT_SAFETY_KEY" + analyze_enabled: true + analyze_severity_threshold: "medium" # low | medium | high + jailbreak_shield_enabled: true + indirect_attack_shield_enabled: true + copyright_enabled: false + text_blocklist_enabled: false + blocklist_names: [] +``` + + + + +```yaml +bifrost: + guardrails: + providers: + - id: 4 + provider_name: "grayswan" + policy_name: "grayswan-jailbreak" + enabled: true + timeout: 15 + config: + api_key: "env.GRAYSWAN_API_KEY" + violation_threshold: 0.7 # 0.0–1.0; higher = more permissive + reasoning_mode: "standard" # standard | fast + policy_id: "" # optional: single policy ID + policy_ids: [] # optional: multiple policy IDs + rules: {} # optional: inline rule map +``` + + + + +```yaml +bifrost: + guardrails: + providers: + - id: 5 + provider_name: "patronus-ai" + policy_name: "patronus-safety" + enabled: true + timeout: 20 + config: + api_key: "env.PATRONUS_API_KEY" + environment: "production" # production | development +``` + + + + +--- + +## Rules + +Rules are CEL expressions that fire when their condition is met. Available CEL variables: + +| Variable | Type | Description | +|----------|------|-------------| +| `model` | `string` | Model name from the request | +| `provider` | `string` | Provider name (e.g. `"openai"`) | +| `headers` | `map` | HTTP request headers | +| `params` | `map` | Query parameters | +| `customer` | `string` | Customer ID | +| `team` | `string` | Team ID | +| `user` | `string` | User ID | + +Rule fields: + +| Field | Required | Description | +|-------|----------|-------------| +| `id` | Yes | Unique integer ID | +| `name` | Yes | Human-readable name | +| `description` | No | Optional description | +| `enabled` | Yes | `true` to activate | +| `cel_expression` | Yes | CEL boolean expression; `"true"` matches all requests | +| `apply_to` | Yes | `"input"`, `"output"`, or `"both"` | +| `sampling_rate` | No | `0`–`100`; percentage of requests to check (default: 100) | +| `timeout` | No | Rule timeout in seconds | +| `provider_config_ids` | No | Provider `id`s to invoke when this rule matches | + +```yaml +bifrost: + guardrails: + rules: + - id: 101 + name: "block-secrets-input" + description: "Block prompts containing API keys" + enabled: true + cel_expression: "true" + apply_to: "input" + sampling_rate: 100 + timeout: 10 + provider_config_ids: [1] + + - id: 102 + name: "azure-output-gpt4o" + description: "Scan GPT-4o responses" + enabled: true + cel_expression: "model == 'gpt-4o'" + apply_to: "output" + sampling_rate: 100 + timeout: 15 + provider_config_ids: [3] + + - id: 103 + name: "grayswan-openai-input" + enabled: true + cel_expression: "provider == 'openai'" + apply_to: "input" + sampling_rate: 50 + timeout: 20 + provider_config_ids: [4] + + - id: 104 + name: "strict-team-check" + enabled: true + cel_expression: "team == 'team-platform'" + apply_to: "both" + sampling_rate: 100 + timeout: 30 + provider_config_ids: [1, 3] # multiple providers run in parallel +``` + +--- + +## Full example + +```yaml +# guardrails-values.yaml +image: + tag: "latest" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + guardrails: + providers: + - id: 1 + provider_name: "regex" + policy_name: "block-secrets" + enabled: true + timeout: 5 + config: + patterns: + - pattern: "sk-[A-Za-z0-9]{20,}" + description: "OpenAI API key" + - pattern: "AKIA[0-9A-Z]{16}" + description: "AWS access key" + - pattern: "gh[ps]_[A-Za-z0-9]{36}" + description: "GitHub token" + + - id: 2 + provider_name: "azure" + policy_name: "content-safety" + enabled: true + timeout: 10 + config: + endpoint: "https://your-resource.cognitiveservices.azure.com" + api_key: "env.AZURE_CONTENT_SAFETY_KEY" + analyze_enabled: true + analyze_severity_threshold: "medium" + jailbreak_shield_enabled: true + indirect_attack_shield_enabled: false + copyright_enabled: false + text_blocklist_enabled: false + + rules: + - id: 101 + name: "block-secrets-input" + description: "Block prompts leaking credentials" + enabled: true + cel_expression: "true" + apply_to: "input" + sampling_rate: 100 + timeout: 10 + provider_config_ids: [1] + + - id: 102 + name: "content-safety-both" + description: "Azure content safety on input and output" + enabled: true + cel_expression: "true" + apply_to: "both" + sampling_rate: 100 + timeout: 15 + provider_config_ids: [2] +``` + +```bash +kubectl create secret generic azure-content-safety \ + --from-literal=key='your-azure-content-safety-api-key' + +helm install bifrost bifrost/bifrost \ + -f guardrails-values.yaml \ + --set env[0].name=AZURE_CONTENT_SAFETY_KEY \ + --set env[0].valueFrom.secretKeyRef.name=azure-content-safety \ + --set env[0].valueFrom.secretKeyRef.key=key +``` diff --git a/docs/deployment-guides/helm/plugins.mdx b/docs/deployment-guides/helm/plugins.mdx new file mode 100644 index 0000000000..f02303120b --- /dev/null +++ b/docs/deployment-guides/helm/plugins.mdx @@ -0,0 +1,578 @@ +--- +title: "Plugins" +description: "Configure Bifrost plugins in Helm — telemetry, logging, semantic cache, OpenTelemetry, Datadog, governance, and custom plugins" +icon: "puzzle-piece" +--- + +Plugins are configured under `bifrost.plugins`. Each plugin is independently enabled/disabled. Pre-hooks run in registration order; post-hooks run in reverse order. + +```yaml +bifrost: + plugins: + telemetry: + enabled: true + logging: + enabled: true + governance: + enabled: true + semanticCache: + enabled: false + otel: + enabled: false + datadog: + enabled: false +``` + +```bash +# Enable plugins at install time +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set bifrost.plugins.telemetry.enabled=true \ + --set bifrost.plugins.logging.enabled=true \ + --set bifrost.plugins.governance.enabled=true + +# Or upgrade to enable a plugin without touching other values +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.plugins.otel.enabled=true +``` + +--- + + + + + +### Telemetry (Prometheus) + +Exposes Prometheus metrics at `GET /metrics`. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.telemetry.enabled` | Enable Prometheus metrics | `false` | +| `bifrost.plugins.telemetry.config.custom_labels` | Extra labels attached to every metric | `[]` | +| `bifrost.plugins.telemetry.config.push_gateway.enabled` | Push metrics to a Prometheus Push Gateway | `false` | +| `bifrost.plugins.telemetry.config.push_gateway.push_gateway_url` | Push Gateway URL | `""` | +| `bifrost.plugins.telemetry.config.push_gateway.job_name` | Job label | `"bifrost"` | +| `bifrost.plugins.telemetry.config.push_gateway.push_interval` | Push interval in seconds | `15` | + +**Basic setup:** + +```yaml +# telemetry-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + telemetry: + enabled: true + config: + custom_labels: + - name: "environment" + value: "production" + - name: "region" + value: "us-east-1" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f telemetry-values.yaml + +# Verify metrics are exposed +kubectl port-forward svc/bifrost 8080:8080 & +curl http://localhost:8080/metrics | head -30 +``` + +**With Prometheus Push Gateway** (recommended for multi-replica / HA setups where pull-based scraping can miss pods): + +```yaml +bifrost: + plugins: + telemetry: + enabled: true + config: + push_gateway: + enabled: true + push_gateway_url: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091" + job_name: "bifrost" + instance_id: "" # auto-derived from pod name if empty + push_interval: 15 + basic_auth: + username: "" + password: "" +``` + +**ServiceMonitor for Prometheus Operator:** + +```yaml +serviceMonitor: + enabled: true + interval: 30s + scrapeTimeout: 10s + namespace: monitoring # namespace where Prometheus is deployed +``` + + + + + +### Request/Response Logging + +Persists full request and response data to the configured log store. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.logging.enabled` | Enable request/response logging | `false` | +| `bifrost.plugins.logging.config.disable_content_logging` | Strip message body from logs | `false` | +| `bifrost.plugins.logging.config.logging_headers` | HTTP headers to capture in log metadata | `[]` | + +```yaml +# logging-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + logging: + enabled: true + config: + disable_content_logging: false # set true for HIPAA/compliance + logging_headers: + - "x-request-id" + - "x-user-id" + - "x-team-id" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f logging-values.yaml +``` + +**Verify logs are being written:** + +```bash +kubectl port-forward svc/bifrost 8080:8080 & +# Make a test request, then query logs +curl -s "http://localhost:8080/api/logs?limit=5" | jq . +``` + + +`bifrost.plugins.logging` controls the *plugin* (which hooks into every request). `bifrost.client.enableLogging` / `disableContentLogging` controls the *client-level* defaults. Both must be configured consistently — see the [Client Configuration](/deployment-guides/helm/client) page. + + + + + + +### Governance Plugin + +Enforces budget caps, rate limits, and virtual key policies on every request. Must be enabled alongside `bifrost.governance` resource definitions. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.governance.enabled` | Enable governance enforcement | `false` | +| `bifrost.plugins.governance.config.is_vk_mandatory` | Reject requests without a virtual key | `false` | +| `bifrost.plugins.governance.config.required_headers` | Additional headers required on every request | `[]` | +| `bifrost.plugins.governance.config.is_enterprise` | Enable enterprise governance features | `false` | + +```yaml +# governance-plugin-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + governance: + enabled: true + config: + is_vk_mandatory: true # require virtual key on all inference requests + required_headers: [] +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f governance-plugin-values.yaml +``` + +See the [Governance](/deployment-guides/helm/governance) page for defining budgets, rate limits, and virtual keys. + + + + + +### Semantic Cache + +Caches LLM responses using vector similarity so semantically equivalent prompts return cached answers. + +Two modes: +- **Semantic mode** (`dimension > 1`): uses an embedding model + vector store for similarity search +- **Direct / hash mode** (`dimension: 1`): exact-match hash-based caching, no embedding model needed + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.semanticCache.enabled` | Enable semantic caching | `false` | +| `bifrost.plugins.semanticCache.config.provider` | Embedding provider | `"openai"` | +| `bifrost.plugins.semanticCache.config.embedding_model` | Embedding model name | `"text-embedding-3-small"` | +| `bifrost.plugins.semanticCache.config.dimension` | Embedding dimension (`1` = direct/hash mode) | `1536` | +| `bifrost.plugins.semanticCache.config.threshold` | Cosine similarity threshold (0–1) | `0.8` | +| `bifrost.plugins.semanticCache.config.ttl` | Cache entry TTL (Go duration) | `"5m"` | +| `bifrost.plugins.semanticCache.config.conversation_history_threshold` | Number of past messages to include in cache key | `3` | +| `bifrost.plugins.semanticCache.config.cache_by_model` | Include model name in cache key | `true` | +| `bifrost.plugins.semanticCache.config.cache_by_provider` | Include provider name in cache key | `true` | +| `bifrost.plugins.semanticCache.config.exclude_system_prompt` | Exclude system prompt from cache key | `false` | +| `bifrost.plugins.semanticCache.config.cleanup_on_shutdown` | Delete cache data on pod shutdown | `false` | + +**Semantic mode (with OpenAI embeddings + Weaviate):** + +```bash +kubectl create secret generic semantic-cache-secret \ + --from-literal=openai-key='sk-your-openai-embedding-key' +``` + +```yaml +# semantic-cache-values.yaml +image: + tag: "v1.4.11" + +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: true + persistence: + size: 20Gi + +bifrost: + plugins: + semanticCache: + enabled: true + config: + provider: "openai" + keys: + - value: "env.SEMANTIC_CACHE_OPENAI_KEY" + weight: 1 + embedding_model: "text-embedding-3-small" + dimension: 1536 + threshold: 0.85 + ttl: "1h" + conversation_history_threshold: 5 + cache_by_model: true + cache_by_provider: true + + providerSecrets: + semantic-cache-key: + existingSecret: "semantic-cache-secret" + key: "openai-key" + envVar: "SEMANTIC_CACHE_OPENAI_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f semantic-cache-values.yaml +``` + +**Direct / hash mode** (no embedding provider needed): + +```yaml +bifrost: + plugins: + semanticCache: + enabled: true + config: + dimension: 1 # triggers hash-based exact matching + ttl: "30m" + cache_by_model: true + cache_by_provider: true +``` + + +The vector store (`vectorStore.*`) must be configured and enabled for semantic mode. Direct/hash mode works without a vector store but still requires a storage backend. + + + + + + +### OpenTelemetry (OTel) + +Sends distributed traces and push-based metrics to any OTLP-compatible collector (Jaeger, Tempo, Honeycomb, etc.). + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.otel.enabled` | Enable OTel tracing | `false` | +| `bifrost.plugins.otel.config.service_name` | Service name in traces | `"bifrost"` | +| `bifrost.plugins.otel.config.collector_url` | OTLP collector endpoint | `""` | +| `bifrost.plugins.otel.config.trace_type` | Trace type (`genai_extension` or `default`) | `"genai_extension"` | +| `bifrost.plugins.otel.config.protocol` | Transport protocol (`grpc` or `http`) | `"grpc"` | +| `bifrost.plugins.otel.config.metrics_enabled` | Enable OTLP push-based metrics | `false` | +| `bifrost.plugins.otel.config.metrics_endpoint` | OTLP metrics endpoint | `""` | +| `bifrost.plugins.otel.config.metrics_push_interval` | Push interval in seconds | `15` | +| `bifrost.plugins.otel.config.headers` | Custom headers for the collector | `{}` | +| `bifrost.plugins.otel.config.insecure` | Skip TLS verification | `false` | +| `bifrost.plugins.otel.config.tls_ca_cert` | Path to CA cert for TLS | `""` | + +```yaml +# otel-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + otel: + enabled: true + config: + service_name: "bifrost-production" + collector_url: "otel-collector.observability.svc.cluster.local:4317" + trace_type: "genai_extension" + protocol: "grpc" + insecure: true # set false in production with a proper cert + metrics_enabled: true + metrics_endpoint: "otel-collector.observability.svc.cluster.local:4317" + metrics_push_interval: 15 + headers: + x-honeycomb-team: "env.HONEYCOMB_API_KEY" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f otel-values.yaml +``` + +**With authentication headers from a Kubernetes Secret:** + +```bash +kubectl create secret generic otel-credentials \ + --from-literal=api-key='your-honeycomb-or-grafana-key' +``` + +```yaml +bifrost: + plugins: + otel: + enabled: true + config: + collector_url: "api.honeycomb.io:443" + protocol: "grpc" + headers: + x-honeycomb-team: "env.OTEL_API_KEY" + + providerSecrets: + otel-key: + existingSecret: "otel-credentials" + key: "api-key" + envVar: "OTEL_API_KEY" +``` + + + + + +### Datadog APM + +Sends traces to a Datadog Agent running in the cluster. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.datadog.enabled` | Enable Datadog tracing | `false` | +| `bifrost.plugins.datadog.config.service_name` | Service name | `"bifrost"` | +| `bifrost.plugins.datadog.config.agent_addr` | Datadog Agent address | `"localhost:8126"` | +| `bifrost.plugins.datadog.config.env` | Deployment environment tag | `""` | +| `bifrost.plugins.datadog.config.version` | Version tag | `""` | +| `bifrost.plugins.datadog.config.enable_traces` | Enable trace collection | `true` | +| `bifrost.plugins.datadog.config.custom_tags` | Extra tags on all spans | `{}` | + +The Datadog Agent is typically deployed via the [Datadog Helm chart](https://docs.datadoghq.com/containers/kubernetes/installation/) as a DaemonSet, making it available at the node's hostIP. + +```yaml +# datadog-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + datadog: + enabled: true + config: + service_name: "bifrost" + agent_addr: "$(HOST_IP):8126" # uses Datadog DaemonSet pattern + env: "production" + version: "v1.4.11" + enable_traces: true + custom_tags: + team: "platform" + region: "us-east-1" + +# Inject HOST_IP so Bifrost can reach the DaemonSet agent on the same node +env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f datadog-values.yaml +``` + + + + + +### Maxim Observability + +Sends LLM request/response data to [Maxim](https://getmaxim.ai) for tracing, evaluation, and observability. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.plugins.maxim.enabled` | Enable Maxim plugin | `false` | +| `bifrost.plugins.maxim.config.api_key` | Maxim API key (plain text, prefer secret) | `""` | +| `bifrost.plugins.maxim.config.log_repo_id` | Maxim log repository ID | `""` | +| `bifrost.plugins.maxim.secretRef.name` | Kubernetes Secret name for API key | `""` | +| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` | + +```bash +kubectl create secret generic maxim-credentials \ + --from-literal=api-key='your-maxim-api-key' +``` + +```yaml +# maxim-values.yaml +image: + tag: "v1.4.11" + +bifrost: + plugins: + maxim: + enabled: true + config: + log_repo_id: "your-log-repo-id" + secretRef: + name: "maxim-credentials" + key: "api-key" +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f maxim-values.yaml +``` + + + + + +### Custom / Dynamic Plugins + +Load a custom Go plugin (compiled `.so` file) at runtime. + +```yaml +bifrost: + plugins: + custom: + - name: "my-custom-plugin" + enabled: true + path: "/plugins/my-plugin.so" + version: 1 + config: + api_endpoint: "https://my-service.example.com" + timeout: 5000 +``` + +Mount the `.so` file via a volume: + +```yaml +volumes: + - name: custom-plugins + configMap: + name: bifrost-custom-plugins + +volumeMounts: + - name: custom-plugins + mountPath: /plugins +``` + +Or use an init container to download the plugin binary: + +```yaml +initContainers: + - name: download-plugin + image: curlimages/curl:8.6.0 + command: + - sh + - -c + - | + curl -fsSL https://plugins.example.com/my-plugin.so \ + -o /plugins/my-plugin.so + volumeMounts: + - name: plugin-dir + mountPath: /plugins + +volumes: + - name: plugin-dir + emptyDir: {} + +volumeMounts: + - name: plugin-dir + mountPath: /plugins +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f custom-plugin-values.yaml +``` + + + + + +--- + +## All Plugins Together + +```yaml +# all-plugins-values.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "encryption-key" + + plugins: + telemetry: + enabled: true + config: + custom_labels: + - name: "environment" + value: "production" + + logging: + enabled: true + config: + disable_content_logging: false + logging_headers: + - "x-request-id" + + governance: + enabled: true + config: + is_vk_mandatory: true + + semanticCache: + enabled: true + config: + provider: "openai" + keys: + - value: "env.CACHE_OPENAI_KEY" + weight: 1 + embedding_model: "text-embedding-3-small" + dimension: 1536 + threshold: 0.85 + ttl: "1h" + + otel: + enabled: true + config: + service_name: "bifrost" + collector_url: "otel-collector.observability.svc.cluster.local:4317" + protocol: "grpc" + insecure: true +``` + +```bash +helm install bifrost bifrost/bifrost -f all-plugins-values.yaml +``` diff --git a/docs/deployment-guides/helm/providers.mdx b/docs/deployment-guides/helm/providers.mdx new file mode 100644 index 0000000000..8a4e0ccc4c --- /dev/null +++ b/docs/deployment-guides/helm/providers.mdx @@ -0,0 +1,941 @@ +--- +title: "Provider Setup" +description: "Configure LLM providers in the Bifrost Helm chart — API keys, cloud-native auth, and self-hosted endpoints" +icon: "plug" +--- + +All providers are configured under `bifrost.providers` in your values file. Each provider entry contains a `keys` list where each key has a `name`, `value`, `weight`, and optional provider-specific config. + +**Two ways to supply credentials:** + +- **Direct value** — `value: "sk-..."` (fine for dev; avoid in production) +- **Kubernetes Secret + env var** — store the key in a Secret, inject as an env var, and reference it with `value: "env.VAR_NAME"` + +The `providerSecrets` block handles the Secret → env var injection automatically: + +```yaml +bifrost: + providers: + openai: + keys: + - name: "primary" + value: "env.OPENAI_API_KEY" # resolved at runtime + weight: 1 + + providerSecrets: + openai: + existingSecret: "my-openai-secret" + key: "api-key" + envVar: "OPENAI_API_KEY" # injected into the pod +``` + +--- + + + + + +### OpenAI + +Supports multiple keys with weighted load balancing. The key with `use_for_batch_api: true` is eligible for the Batch API. + +**Step 1 — Create secret** + +```bash +kubectl create secret generic openai-credentials \ + --from-literal=api-key-1='sk-your-primary-key' \ + --from-literal=api-key-2='sk-your-secondary-key' \ + --from-literal=api-key-batch='sk-your-batch-key' +``` + +**Step 2 — Values file** + +```yaml +# openai-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_KEY_1" + weight: 2 # 50% of traffic + models: ["*"] + - name: "openai-secondary" + value: "env.OPENAI_KEY_2" + weight: 1 # 25% + models: ["gpt-4o-mini"] # restrict to cheaper model + - name: "openai-batch" + value: "env.OPENAI_KEY_BATCH" + weight: 1 # 25% + models: ["*"] + use_for_batch_api: true + + providerSecrets: + openai-key-1: + existingSecret: "openai-credentials" + key: "api-key-1" + envVar: "OPENAI_KEY_1" + openai-key-2: + existingSecret: "openai-credentials" + key: "api-key-2" + envVar: "OPENAI_KEY_2" + openai-key-batch: + existingSecret: "openai-credentials" + key: "api-key-batch" + envVar: "OPENAI_KEY_BATCH" +``` + +**Step 3 — Install** + +```bash +helm install bifrost bifrost/bifrost -f openai-values.yaml +``` + +**Optional — per-provider network config** + +```yaml +bifrost: + providers: + openai: + keys: + - name: "primary" + value: "env.OPENAI_KEY_1" + weight: 1 + network_config: + default_request_timeout_in_seconds: 120 + max_retries: 3 + retry_backoff_initial_ms: 500 + retry_backoff_max_ms: 5000 + max_conns_per_host: 5000 +``` + + + + + +### Anthropic + +```bash +kubectl create secret generic anthropic-credentials \ + --from-literal=api-key-1='sk-ant-your-primary-key' \ + --from-literal=api-key-2='sk-ant-your-secondary-key' +``` + +```yaml +# anthropic-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_KEY_1" + weight: 1 + models: ["*"] + - name: "anthropic-secondary" + value: "env.ANTHROPIC_KEY_2" + weight: 1 + models: ["*"] + + providerSecrets: + anthropic-key-1: + existingSecret: "anthropic-credentials" + key: "api-key-1" + envVar: "ANTHROPIC_KEY_1" + anthropic-key-2: + existingSecret: "anthropic-credentials" + key: "api-key-2" + envVar: "ANTHROPIC_KEY_2" +``` + +```bash +helm install bifrost bifrost/bifrost -f anthropic-values.yaml +``` + +**Override Anthropic beta headers** (optional): + +```yaml +bifrost: + providers: + anthropic: + keys: + - name: "primary" + value: "env.ANTHROPIC_KEY_1" + weight: 1 + network_config: + beta_header_overrides: + redact-thinking-: true +``` + + + + + +### Azure OpenAI + +Azure requires `azure_key_config` on every key with `endpoint`, `api_version`, and a `deployments` map (logical model name → Azure deployment name). + +Two auth modes are supported: + + + + +**Step 1 — Create secret** + +```bash +kubectl create secret generic azure-credentials \ + --from-literal=api-key='your-azure-openai-api-key' \ + --from-literal=endpoint='https://your-resource.openai.azure.com' +``` + +**Step 2 — Values file** + +```yaml +# azure-apikey-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + azure: + keys: + - name: "azure-primary" + value: "env.AZURE_API_KEY" + weight: 1 + models: ["gpt-4o", "gpt-4o-mini", "text-embedding-3-small"] + azure_key_config: + endpoint: "env.AZURE_ENDPOINT" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-prod" + gpt-4o-mini: "gpt-4o-mini-prod" + text-embedding-3-small: "embeddings-prod" + + providerSecrets: + azure-api-key: + existingSecret: "azure-credentials" + key: "api-key" + envVar: "AZURE_API_KEY" + azure-endpoint: + existingSecret: "azure-credentials" + key: "endpoint" + envVar: "AZURE_ENDPOINT" +``` + +**Step 3 — Install** + +```bash +helm install bifrost bifrost/bifrost -f azure-apikey-values.yaml +``` + + + + +When `value` is empty, Bifrost uses `DefaultAzureCredential` — which automatically resolves credentials from: +- AKS Workload Identity (recommended for production) +- Azure VM managed identity +- `az login` (developer machines) + +**Step 1 — Annotate the service account** (AKS Workload Identity) + +```bash +# Associate the Kubernetes service account with your Azure managed identity +kubectl annotate serviceaccount bifrost \ + azure.workload.identity/client-id="" +``` + +```yaml +serviceAccount: + annotations: + azure.workload.identity/client-id: "" +``` + +**Step 2 — Values file** + +```bash +kubectl create secret generic azure-config \ + --from-literal=endpoint='https://your-resource.openai.azure.com' +``` + +```yaml +# azure-msi-values.yaml +image: + tag: "v1.4.11" + +serviceAccount: + annotations: + azure.workload.identity/client-id: "" + +bifrost: + providers: + azure: + keys: + - name: "azure-workload-identity" + value: "" # empty = DefaultAzureCredential + weight: 1 + models: ["gpt-4o"] + azure_key_config: + endpoint: "env.AZURE_ENDPOINT" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-prod" + + providerSecrets: + azure-endpoint: + existingSecret: "azure-config" + key: "endpoint" + envVar: "AZURE_ENDPOINT" +``` + +**Step 3 — Install** + +```bash +helm install bifrost bifrost/bifrost -f azure-msi-values.yaml +``` + + + + +**Multi-region failover** (two deployments, different regions): + +```yaml +bifrost: + providers: + azure: + keys: + - name: "eastus" + value: "env.AZURE_KEY_EAST" + weight: 1 + azure_key_config: + endpoint: "env.AZURE_ENDPOINT_EAST" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-eastus" + - name: "westus" + value: "env.AZURE_KEY_WEST" + weight: 1 + azure_key_config: + endpoint: "env.AZURE_ENDPOINT_WEST" + api_version: "2024-10-21" + deployments: + gpt-4o: "gpt-4o-westus" +``` + + + + + +### AWS Bedrock + +Bedrock requires `bedrock_key_config` with at minimum a `region`. Three auth modes: + + + + +```bash +kubectl create secret generic aws-credentials \ + --from-literal=access-key-id='AKIAIOSFODNN7EXAMPLE' \ + --from-literal=secret-access-key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' +``` + +```yaml +# bedrock-static-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + bedrock: + keys: + - name: "bedrock-static" + value: "" + weight: 1 + models: ["*"] + bedrock_key_config: + region: "us-east-1" + access_key: "env.AWS_ACCESS_KEY_ID" + secret_key: "env.AWS_SECRET_ACCESS_KEY" + deployments: + # Logical name -> Bedrock inference profile + anthropic.claude-3-5-sonnet: "us.anthropic.claude-3-5-sonnet-20240620-v1:0" + + providerSecrets: + aws-access-key: + existingSecret: "aws-credentials" + key: "access-key-id" + envVar: "AWS_ACCESS_KEY_ID" + aws-secret-key: + existingSecret: "aws-credentials" + key: "secret-access-key" + envVar: "AWS_SECRET_ACCESS_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f bedrock-static-values.yaml +``` + + + + +When only `region` is set, Bifrost inherits credentials from the AWS SDK default chain — IRSA (IAM Roles for Service Accounts), EC2 instance profile, or `AWS_*` env vars. + +**Step 1 — Annotate the service account with the IAM role** + +```bash +kubectl annotate serviceaccount bifrost \ + eks.amazonaws.com/role-arn="arn:aws:iam::123456789012:role/BifrostBedrockRole" +``` + +```yaml +serviceAccount: + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::123456789012:role/BifrostBedrockRole" +``` + +**Step 2 — Values file** + +```yaml +# bedrock-irsa-values.yaml +image: + tag: "v1.4.11" + +serviceAccount: + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::123456789012:role/BifrostBedrockRole" + +bifrost: + providers: + bedrock: + keys: + - name: "bedrock-irsa" + value: "" + weight: 1 + models: ["*"] + bedrock_key_config: + region: "us-east-1" + # No access_key / secret_key — SDK uses IRSA token automatically +``` + +```bash +helm install bifrost bifrost/bifrost -f bedrock-irsa-values.yaml +``` + + + + +Assumes a cross-account role on top of the default credential chain. + +```yaml +# bedrock-assumerole-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + bedrock: + keys: + - name: "bedrock-assumerole" + value: "" + weight: 1 + models: ["*"] + bedrock_key_config: + region: "us-west-2" + # Source identity from pod's default chain, then assume this role + role_arn: "env.AWS_ROLE_ARN" + external_id: "env.AWS_EXTERNAL_ID" + session_name: "bifrost-session" +``` + +```bash +kubectl create secret generic aws-role-config \ + --from-literal=role-arn='arn:aws:iam::999999999999:role/CrossAccountBedrockRole' \ + --from-literal=external-id='your-external-id' +``` + +```yaml + providerSecrets: + aws-role-arn: + existingSecret: "aws-role-config" + key: "role-arn" + envVar: "AWS_ROLE_ARN" + aws-external-id: + existingSecret: "aws-role-config" + key: "external-id" + envVar: "AWS_EXTERNAL_ID" +``` + +```bash +helm install bifrost bifrost/bifrost -f bedrock-assumerole-values.yaml +``` + + + + +**Batch API — S3 configuration** + +```yaml +bedrock_key_config: + region: "us-east-1" + access_key: "env.AWS_ACCESS_KEY_ID" + secret_key: "env.AWS_SECRET_ACCESS_KEY" + batch_s3_config: + buckets: + - bucket_name: "my-bedrock-batch-bucket" + prefix: "batch/" + is_default: true +``` + + + + + +### Google Vertex AI + +Vertex requires `vertex_key_config` with `project_id` and `region`. Two auth modes: + + + + +```bash +# Base64-encode the service account JSON +SA_JSON=$(cat service-account-key.json | base64 -w 0) + +kubectl create secret generic gcp-credentials \ + --from-literal=service-account-json="${SA_JSON}" +``` + +```yaml +# vertex-sa-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + vertex: + keys: + - name: "vertex-sa-key" + value: "" + weight: 1 + models: ["*"] + vertex_key_config: + project_id: "env.VERTEX_PROJECT_ID" + region: "us-central1" + auth_credentials: "env.VERTEX_AUTH_CREDENTIALS" + + providerSecrets: + vertex-project-id: + existingSecret: "gcp-credentials" + key: "project-id" + envVar: "VERTEX_PROJECT_ID" + vertex-sa: + existingSecret: "gcp-credentials" + key: "service-account-json" + envVar: "VERTEX_AUTH_CREDENTIALS" +``` + +```bash +helm install bifrost bifrost/bifrost -f vertex-sa-values.yaml +``` + + + + +When `auth_credentials` is omitted, Bifrost calls `google.FindDefaultCredentials` — which resolves to: +- GKE Workload Identity (recommended) +- GCE metadata server (on Compute Engine / Cloud Run) +- `GOOGLE_APPLICATION_CREDENTIALS` path +- `gcloud auth application-default login` (developer machines) + +**Step 1 — Annotate the service account** (GKE Workload Identity) + +```bash +gcloud iam service-accounts add-iam-policy-binding \ + bifrost-sa@my-project.iam.gserviceaccount.com \ + --role roles/iam.workloadIdentityUser \ + --member "serviceAccount:my-project.svc.id.goog[default/bifrost]" +``` + +```yaml +serviceAccount: + annotations: + iam.gke.io/gcp-service-account: "bifrost-sa@my-project.iam.gserviceaccount.com" +``` + +**Step 2 — Values file** + +```yaml +# vertex-wli-values.yaml +image: + tag: "v1.4.11" + +serviceAccount: + annotations: + iam.gke.io/gcp-service-account: "bifrost-sa@my-project.iam.gserviceaccount.com" + +bifrost: + providers: + vertex: + keys: + - name: "vertex-workload-identity" + value: "" + weight: 1 + models: ["*"] + vertex_key_config: + project_id: "my-gcp-project" + region: "us-central1" + # auth_credentials intentionally omitted → ADC lookup +``` + +```bash +helm install bifrost bifrost/bifrost -f vertex-wli-values.yaml +``` + + + + + + + + +### Standard API-Key Providers + +These providers follow the same simple pattern — one or more keys with weights. + + + + +```bash +kubectl create secret generic groq-credentials \ + --from-literal=api-key='gsk_your_groq_api_key' +``` + +```yaml +bifrost: + providers: + groq: + keys: + - name: "groq-primary" + value: "env.GROQ_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + groq-key: + existingSecret: "groq-credentials" + key: "api-key" + envVar: "GROQ_API_KEY" +``` + + + + +```bash +kubectl create secret generic gemini-credentials \ + --from-literal=api-key='your-gemini-api-key' +``` + +```yaml +bifrost: + providers: + gemini: + keys: + - name: "gemini-main" + value: "env.GEMINI_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + gemini-key: + existingSecret: "gemini-credentials" + key: "api-key" + envVar: "GEMINI_API_KEY" +``` + + + + +```bash +kubectl create secret generic mistral-credentials \ + --from-literal=api-key='your-mistral-api-key' +``` + +```yaml +bifrost: + providers: + mistral: + keys: + - name: "mistral-main" + value: "env.MISTRAL_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + mistral-key: + existingSecret: "mistral-credentials" + key: "api-key" + envVar: "MISTRAL_API_KEY" +``` + + + + +All standard API-key providers follow the same pattern. Replace the provider name and env var name accordingly: + +```yaml +bifrost: + providers: + cohere: + keys: + - name: "cohere-main" + value: "env.COHERE_API_KEY" + weight: 1 + perplexity: + keys: + - name: "perplexity-main" + value: "env.PERPLEXITY_API_KEY" + weight: 1 + xai: + keys: + - name: "xai-main" + value: "env.XAI_API_KEY" + weight: 1 + cerebras: + keys: + - name: "cerebras-main" + value: "env.CEREBRAS_API_KEY" + weight: 1 + openrouter: + keys: + - name: "openrouter-main" + value: "env.OPENROUTER_API_KEY" + weight: 1 + nebius: + keys: + - name: "nebius-main" + value: "env.NEBIUS_API_KEY" + weight: 1 +``` + + + + +**Install command (any of the above)** + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + -f provider-values.yaml +``` + + + + + +### Self-Hosted Providers + +Self-hosted providers point to a URL you operate. No API key is typically required (`value: ""`). + + + + +```yaml +# ollama-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + ollama: + keys: + - name: "ollama-local" + value: "" + weight: 1 + models: ["*"] + ollama_key_config: + url: "http://ollama.default.svc.cluster.local:11434" +``` + +```bash +helm install bifrost bifrost/bifrost -f ollama-values.yaml +``` + +Using an env var for the URL (useful across environments): + +```bash +kubectl create secret generic ollama-config \ + --from-literal=url='http://ollama.default.svc.cluster.local:11434' +``` + +```yaml + ollama_key_config: + url: "env.OLLAMA_URL" + + providerSecrets: + ollama-url: + existingSecret: "ollama-config" + key: "url" + envVar: "OLLAMA_URL" +``` + + + + +vLLM instances are model-specific — one key per served model. + +```yaml +# vllm-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + vllm: + keys: + - name: "vllm-llama3-70b" + value: "" + weight: 1 + models: ["llama-3-70b"] + vllm_key_config: + url: "http://vllm.default.svc.cluster.local:8000" + model_name: "meta-llama/Meta-Llama-3-70B-Instruct" + - name: "vllm-mistral" + value: "" + weight: 1 + models: ["mistral-7b"] + vllm_key_config: + url: "http://vllm-mistral.default.svc.cluster.local:8000" + model_name: "mistralai/Mistral-7B-Instruct-v0.3" +``` + +```bash +helm install bifrost bifrost/bifrost -f vllm-values.yaml +``` + + + + +```yaml +# sgl-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + sgl: + keys: + - name: "sgl-main" + value: "" + weight: 1 + models: ["*"] + sgl_key_config: + url: "http://sgl-router.default.svc.cluster.local:30000" +``` + +```bash +helm install bifrost bifrost/bifrost -f sgl-values.yaml +``` + + + + +These providers use `aliases` to map logical model names to provider-specific IDs. + +```yaml +bifrost: + providers: + huggingface: + keys: + - name: "hf-main" + value: "env.HF_API_KEY" + weight: 1 + models: ["llama-3", "mixtral"] + aliases: + llama-3: "meta-llama/Meta-Llama-3-8B-Instruct" + mixtral: "mistralai/Mixtral-8x7B-Instruct-v0.1" + + replicate: + keys: + - name: "replicate-main" + value: "env.REPLICATE_API_KEY" + weight: 1 + models: ["llama-3"] + aliases: + llama-3: "meta/meta-llama-3-70b-instruct" + replicate_key_config: + use_deployments_endpoint: false +``` + + + + + + + + +--- + +## Multi-Provider Example + +Combine providers in a single values file: + +```yaml +# multi-provider-values.yaml +image: + tag: "v1.4.11" + +bifrost: + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 2 + models: ["*"] + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" + weight: 1 + models: ["*"] + groq: + keys: + - name: "groq-primary" + value: "env.GROQ_API_KEY" + weight: 1 + models: ["*"] + + providerSecrets: + openai-key: + existingSecret: "provider-keys" + key: "openai" + envVar: "OPENAI_API_KEY" + anthropic-key: + existingSecret: "provider-keys" + key: "anthropic" + envVar: "ANTHROPIC_API_KEY" + groq-key: + existingSecret: "provider-keys" + key: "groq" + envVar: "GROQ_API_KEY" + + plugins: + logging: + enabled: true + governance: + enabled: true +``` + +```bash +# Create a single secret with all provider keys +kubectl create secret generic provider-keys \ + --from-literal=openai='sk-your-openai-key' \ + --from-literal=anthropic='sk-ant-your-anthropic-key' \ + --from-literal=groq='gsk_your-groq-key' + +helm install bifrost bifrost/bifrost -f multi-provider-values.yaml +``` diff --git a/docs/deployment-guides/helm/storage.mdx b/docs/deployment-guides/helm/storage.mdx new file mode 100644 index 0000000000..244ece3fb2 --- /dev/null +++ b/docs/deployment-guides/helm/storage.mdx @@ -0,0 +1,550 @@ +--- +title: "Storage" +description: "Configure Bifrost storage backends in Helm — SQLite, PostgreSQL (embedded and external), per-store overrides, and S3/GCS object storage for logs" +icon: "database" +--- + +Bifrost persists two types of data — **config** (providers, virtual keys, governance rules) and **logs** (request/response records). Each has its own store, both defaulting to the top-level `storage.mode`. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `storage.mode` | Default backend for both stores (`sqlite` or `postgres`) | `sqlite` | +| `storage.configStore.type` | Override backend for the config store | `""` (inherits `storage.mode`) | +| `storage.logsStore.type` | Override backend for the logs store | `""` (inherits `storage.mode`) | + + +When any store uses SQLite the chart deploys a **StatefulSet** with a PVC. With PostgreSQL only (no SQLite) it deploys a **Deployment**. Mixing backends (e.g. config=postgres, logs=sqlite) still requires a StatefulSet. + + +--- + + + + + +### SQLite (Default) + +Simplest setup — no external database required. Bifrost runs as a StatefulSet with a persistent volume for the SQLite files. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `storage.persistence.enabled` | Create a PVC for SQLite data | `true` | +| `storage.persistence.size` | PVC size | `10Gi` | +| `storage.persistence.accessMode` | PVC access mode | `ReadWriteOnce` | +| `storage.persistence.storageClass` | Storage class (leave empty for cluster default) | `""` | +| `storage.persistence.existingClaim` | Reuse an existing PVC | `""` | + +```yaml +# sqlite-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: sqlite + persistence: + enabled: true + size: 20Gi + # storageClass: "gp3" # uncomment to pin storage class + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +helm install bifrost bifrost/bifrost -f sqlite-values.yaml +``` + +**Reuse an existing PVC** (e.g. after a StatefulSet migration): + +```yaml +storage: + persistence: + existingClaim: "bifrost-data" +``` + + +Upgrading from SQLite to PostgreSQL requires a data migration — the two stores are not compatible. Plan accordingly before switching `storage.mode` on a running deployment. + + +#### StatefulSet Migration (chart v2.0.0+) + +Prior to v2.0.0, SQLite used a Deployment + manual PVC. v2.0.0 moved SQLite to a StatefulSet. If upgrading from an older chart: + +```bash +# 1. Scale down the old deployment +kubectl scale deployment bifrost --replicas=0 + +# 2. Note the existing PVC name +kubectl get pvc + +# 3. Upgrade the chart, pointing at the existing claim +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.existingClaim= \ + --set image.tag=v1.4.11 +``` + + + + + +### Embedded PostgreSQL + +The chart can deploy a PostgreSQL instance alongside Bifrost. Good for simple production setups where you don't have an existing database. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `storage.mode` | Set to `postgres` | `sqlite` | +| `postgresql.enabled` | Deploy PostgreSQL as a sub-deployment | `false` | +| `postgresql.auth.username` | Database user | `bifrost` | +| `postgresql.auth.password` | Database password | `bifrost_password` | +| `postgresql.auth.database` | Database name | `bifrost` | +| `postgresql.primary.persistence.size` | PVC size for PostgreSQL data | `8Gi` | + + +Ensure the database is created with **UTF8 encoding**. The embedded PostgreSQL deployment handles this automatically. See [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement) for manual setups. + + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-secure-postgres-password' +``` + +```yaml +# embedded-postgres-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: true + auth: + username: bifrost + password: "your-secure-postgres-password" # use existingSecret in production + database: bifrost + primary: + persistence: + enabled: true + size: 50Gi + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +helm install bifrost bifrost/bifrost -f embedded-postgres-values.yaml +``` + +**Verify the connection from Bifrost:** + +```bash +kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432 +``` + + + + + +### External PostgreSQL + +Point Bifrost at an existing PostgreSQL instance — RDS, Cloud SQL, Azure Database, or self-managed. + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `postgresql.enabled` | Must be `false` | `false` | +| `postgresql.external.enabled` | Enable external connection | `false` | +| `postgresql.external.host` | Hostname or IP | `""` | +| `postgresql.external.port` | Port | `5432` | +| `postgresql.external.user` | Username | `bifrost` | +| `postgresql.external.database` | Database name | `bifrost` | +| `postgresql.external.sslMode` | SSL mode (`disable`, `require`, `verify-ca`, `verify-full`) | `disable` | +| `postgresql.external.existingSecret` | Secret name for the password | `""` | +| `postgresql.external.passwordKey` | Key within the secret | `"password"` | + +```bash +kubectl create secret generic external-postgres-credentials \ + --from-literal=password='your-external-postgres-password' +``` + +```yaml +# external-postgres-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: false + external: + enabled: true + host: "your-rds-endpoint.us-east-1.rds.amazonaws.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "external-postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +helm install bifrost bifrost/bifrost -f external-postgres-values.yaml +``` + +**Test connectivity before installing:** + +```bash +kubectl run pg-test --image=postgres:16-alpine --rm -it --restart=Never -- \ + psql "host=your-rds-endpoint.us-east-1.rds.amazonaws.com dbname=bifrost user=bifrost sslmode=require" \ + -c "SELECT version();" +``` + + + + + +### Mixed Backend + +Run the config store on PostgreSQL (fast lookups, shared across replicas) while keeping logs on SQLite (simpler, cheaper for append-heavy workloads). + +```yaml +# mixed-values.yaml +image: + tag: "v1.4.11" + +storage: + mode: sqlite # default fallback + configStore: + type: postgres # override: config uses postgres + logsStore: + type: sqlite # explicit: logs use sqlite + persistence: + enabled: true + size: 20Gi # for the SQLite logs store + +postgresql: + external: + enabled: true + host: "your-postgres-host.example.com" + port: 5432 + user: bifrost + database: bifrost + sslMode: require + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" +``` + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-postgres-password' + +helm install bifrost bifrost/bifrost -f mixed-values.yaml +``` + + +In mixed mode, Bifrost deploys a StatefulSet (because SQLite is in use) with both a PostgreSQL connection and a local PVC for the SQLite log store. + + +**PostgreSQL connection pool tuning** (high log volume): + +```yaml +storage: + configStore: + type: postgres + maxIdleConns: 5 + maxOpenConns: 50 + logsStore: + type: postgres + maxIdleConns: 10 + maxOpenConns: 100 +``` + + + + + +--- + +## Object Storage for Logs + +Offload large request/response payloads from the database to S3 or GCS. The DB retains only lightweight index records; payloads are fetched on demand. + + + + +```bash +kubectl create secret generic s3-credentials \ + --from-literal=access-key-id='AKIAIOSFODNN7EXAMPLE' \ + --from-literal=secret-access-key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' +``` + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: s3 + bucket: "bifrost-logs" + prefix: "bifrost" + compress: true # gzip compression + + # S3 configuration + region: us-east-1 + accessKeyId: "env.S3_ACCESS_KEY_ID" + secretAccessKey: "env.S3_SECRET_ACCESS_KEY" + # endpoint: "" # Custom endpoint for MinIO / Cloudflare R2 + # forcePathStyle: false # Set true for MinIO + +bifrost: + # inject S3 credentials as env vars + providerSecrets: + s3-access-key: + existingSecret: "s3-credentials" + key: "access-key-id" + envVar: "S3_ACCESS_KEY_ID" + s3-secret-key: + existingSecret: "s3-credentials" + key: "secret-access-key" + envVar: "S3_SECRET_ACCESS_KEY" +``` + +**Using IAM role (IRSA / instance profile) instead of static keys:** + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: s3 + bucket: "bifrost-logs" + region: us-east-1 + # No accessKeyId / secretAccessKey — uses SDK default chain + roleArn: "arn:aws:iam::123456789012:role/BifrostS3Role" +``` + + + + +```bash +kubectl create secret generic gcs-credentials \ + --from-literal=service-account-json="$(cat service-account-key.json)" +``` + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: gcs + bucket: "bifrost-logs" + prefix: "bifrost" + compress: true + + # GCS configuration + projectId: "my-gcp-project" + credentialsJson: "env.GCS_CREDENTIALS_JSON" # omit for Workload Identity + +bifrost: + providerSecrets: + gcs-creds: + existingSecret: "gcs-credentials" + key: "service-account-json" + envVar: "GCS_CREDENTIALS_JSON" +``` + + + + +```yaml +storage: + logsStore: + objectStorage: + enabled: true + type: s3 + bucket: "bifrost-logs" + prefix: "bifrost" + compress: false + + region: us-east-1 # can be any value for MinIO + endpoint: "http://minio.minio-ns.svc.cluster.local:9000" + accessKeyId: "env.MINIO_ACCESS_KEY" + secretAccessKey: "env.MINIO_SECRET_KEY" + forcePathStyle: true # required for MinIO +``` + + + + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + -f object-storage-values.yaml +``` + +--- + +## Vector Store + +A vector store is required for [semantic caching](/deployment-guides/helm/plugins). Choose from Weaviate, Redis, or Qdrant (embedded or external), or Pinecone (external only). + + + + +```yaml +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: true # deploy embedded Weaviate + replicas: 1 + persistence: + enabled: true + size: 20Gi + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi +``` + +**External Weaviate:** + +```yaml +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: false + external: + enabled: true + scheme: https + host: "weaviate.example.com" + apiKey: "env.WEAVIATE_API_KEY" + grpcHost: "weaviate-grpc.example.com" + grpcSecured: true + existingSecret: "weaviate-credentials" + apiKeyKey: "api-key" +``` + + + + +```yaml +vectorStore: + enabled: true + type: redis + redis: + enabled: true # deploy embedded Redis + auth: + enabled: true + password: "redis_password" + master: + persistence: + size: 8Gi +``` + +**External Redis / AWS MemoryDB:** + +```bash +kubectl create secret generic redis-credentials \ + --from-literal=password='your-redis-password' +``` + +```yaml +vectorStore: + enabled: true + type: redis + redis: + enabled: false + external: + enabled: true + host: "your-redis.cache.amazonaws.com" + port: 6379 + useTls: true + clusterMode: true # required for AWS MemoryDB + existingSecret: "redis-credentials" + passwordKey: "password" +``` + + + + +```yaml +vectorStore: + enabled: true + type: qdrant + qdrant: + enabled: true # deploy embedded Qdrant + persistence: + size: 10Gi +``` + +**External Qdrant:** + +```bash +kubectl create secret generic qdrant-credentials \ + --from-literal=api-key='your-qdrant-api-key' +``` + +```yaml +vectorStore: + enabled: true + type: qdrant + qdrant: + enabled: false + external: + enabled: true + host: "qdrant.example.com" + port: 6334 + useTls: true + existingSecret: "qdrant-credentials" + apiKeyKey: "api-key" +``` + + + + +Pinecone is external-only. + +```bash +kubectl create secret generic pinecone-credentials \ + --from-literal=api-key='your-pinecone-api-key' +``` + +```yaml +vectorStore: + enabled: true + type: pinecone + pinecone: + external: + enabled: true + indexHost: "your-index.svc.us-east1-gcp.pinecone.io" + existingSecret: "pinecone-credentials" + apiKeyKey: "api-key" +``` + + + + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + -f storage-values.yaml +``` diff --git a/docs/deployment-guides/helm/troubleshooting.mdx b/docs/deployment-guides/helm/troubleshooting.mdx new file mode 100644 index 0000000000..1a46d0219d --- /dev/null +++ b/docs/deployment-guides/helm/troubleshooting.mdx @@ -0,0 +1,401 @@ +--- +title: "Troubleshooting" +description: "Diagnose and fix common issues with Bifrost Helm deployments — pods, database, ingress, secrets, PVCs, and performance" +icon: "wrench" +--- + +This page covers the most common problems encountered when deploying Bifrost with Helm, along with diagnostic commands and fixes. + +--- + +## Pod Not Starting + +### Quick diagnostics + +```bash +# Show pod status +kubectl get pods -l app.kubernetes.io/name=bifrost + +# Show pod events (most useful first step) +kubectl describe pod -l app.kubernetes.io/name=bifrost + +# Show pod logs (use --previous if the pod has already crashed) +kubectl logs -l app.kubernetes.io/name=bifrost +kubectl logs -l app.kubernetes.io/name=bifrost --previous +``` + +### Image pull errors (`ErrImagePull` / `ImagePullBackOff`) + +```bash +# Check which image is being pulled +kubectl describe pod -l app.kubernetes.io/name=bifrost | grep "Image:" + +# Verify imagePullSecrets are attached +kubectl get pod -l app.kubernetes.io/name=bifrost -o jsonpath='{.items[0].spec.imagePullSecrets}' + +# Test secret manually +kubectl get secret -o jsonpath='{.data.\.dockerconfigjson}' | base64 -d | jq . +``` + +Common causes: +- `image.tag` not set — the chart requires it; the pod will not start without it +- Pull secret missing or expired (ECR tokens expire after 12 hours) +- Incorrect `image.repository` for enterprise registry + +```bash +# Fix: set the correct tag +helm upgrade bifrost bifrost/bifrost --reuse-values --set image.tag=v1.4.11 +``` + +### PVC not binding (`Pending`) + +```bash +# Check PVC status +kubectl get pvc -l app.kubernetes.io/instance=bifrost + +# Show binding events +kubectl describe pvc -l app.kubernetes.io/instance=bifrost +``` + +Common causes: +- No Persistent Volume provisioner in the cluster +- `storageClass` set to a class that doesn't exist +- `ReadWriteOnce` access mode with multiple replicas (SQLite PVCs are single-node) + +```bash +# List available storage classes +kubectl get storageclass + +# Fix: pin to a valid storage class +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.storageClass=standard +``` + +### ConfigMap / Secret errors + +```bash +# View the generated ConfigMap (contains rendered config.json) +kubectl get configmap bifrost-config -o yaml + +# View secrets the pod depends on +kubectl get secret -l app.kubernetes.io/instance=bifrost + +# Decode a specific secret value +kubectl get secret bifrost-encryption -o jsonpath='{.data.key}' | base64 -d +``` + +### CrashLoopBackOff + +```bash +# Get last log lines before the crash +kubectl logs -l app.kubernetes.io/name=bifrost --previous --tail=50 + +# Common causes shown in logs: +# "encryption key is required" → bifrost.encryptionKey or encryptionKeySecret not set +# "failed to connect to database" → see Database section below +# "image.tag is required" → set image.tag in values +``` + +--- + +## Database Connection Issues + +### Embedded PostgreSQL + +```bash +# Check if the PostgreSQL pod is running +kubectl get pods -l app.kubernetes.io/name=bifrost-postgresql + +# Connect directly to inspect the database +kubectl exec -it deployment/bifrost-postgresql -- psql -U bifrost -d bifrost + +# Test connectivity from the Bifrost pod +kubectl exec -it deployment/bifrost -- nc -zv bifrost-postgresql 5432 + +# Check PostgreSQL logs +kubectl logs deployment/bifrost-postgresql --tail=50 +``` + +### External PostgreSQL + +```bash +# Test connectivity from within the cluster +kubectl run pg-test --image=postgres:16-alpine --rm -it --restart=Never -- \ + psql "host=your-db-host dbname=bifrost user=bifrost sslmode=require" + +# Verify the secret value is correct +kubectl get secret postgres-credentials -o jsonpath='{.data.password}' | base64 -d + +# Check that the external host/port is reachable +kubectl exec -it deployment/bifrost -- nc -zv your-db-host 5432 +``` + +Common causes: +- `sslMode: disable` when the database requires SSL — set `sslMode: require` +- Password in secret doesn't match the database user +- Network policy blocking pod → database traffic +- Database not UTF8 encoded (see [PostgreSQL UTF8 Requirement](/quickstart/gateway/setting-up#postgresql-utf8-requirement)) + +```bash +# Fix: update the secret and restart +kubectl create secret generic postgres-credentials \ + --from-literal=password='correct-password' \ + --dry-run=client -o yaml | kubectl apply -f - + +kubectl rollout restart deployment/bifrost +``` + +--- + +## Ingress Not Working + +```bash +# Check ingress resource status +kubectl describe ingress bifrost + +# Check if the ingress controller is running +kubectl get pods -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx + +# View ingress controller logs for routing errors +kubectl logs -n ingress-nginx -l app.kubernetes.io/name=ingress-nginx --tail=50 + +# Verify DNS resolves to the correct load balancer IP +nslookup bifrost.yourdomain.com +kubectl get ingress bifrost -o jsonpath='{.status.loadBalancer.ingress[0].ip}' + +# Test without TLS first +curl -v http://bifrost.yourdomain.com/health +``` + +Common causes: +- `ingress.className` not set or set to a class not installed in the cluster +- TLS certificate not issued yet (cert-manager can take up to 60 seconds) +- Service port mismatch — Bifrost listens on `8080` by default + +```bash +# Check cert-manager certificate status +kubectl get certificate -l app.kubernetes.io/instance=bifrost +kubectl describe certificate bifrost-tls +``` + +--- + +## Secret and Credential Issues + +### Provider API key not resolving + +If Bifrost logs show `env.OPENAI_API_KEY: not set` or similar: + +```bash +# Check the env var is present in the running pod +kubectl exec -it deployment/bifrost -- env | grep OPENAI + +# Verify the providerSecrets secret exists with the right key +kubectl get secret provider-api-keys -o yaml + +# Check the providerSecrets configuration rendered correctly +kubectl get configmap bifrost-config -o yaml | grep -A5 providers +``` + +### Encryption key issues + +```bash +# Verify the secret exists and contains the right key name +kubectl get secret bifrost-encryption -o yaml + +# Check the exact key name matches encryptionKeySecret.key in values +# Default key name is "encryption-key" — if you used "key", set: +# bifrost.encryptionKeySecret.key: "key" +``` + +--- + +## High Memory Usage + +```bash +# Check current resource usage +kubectl top pods -l app.kubernetes.io/name=bifrost + +# Check if OOM kills are happening +kubectl describe pod -l app.kubernetes.io/name=bifrost | grep -A3 "OOMKilled\|Limits" + +# View resource requests/limits on running pods +kubectl get pod -l app.kubernetes.io/name=bifrost \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].resources}{"\n"}{end}' +``` + +**Increase resource limits:** + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set resources.limits.memory=4Gi \ + --set resources.requests.memory=1Gi +``` + +**Tune Go runtime** (see [Docker Tuning](/deployment-guides/docker-tuning)): + +```yaml +env: + - name: GOGC + value: "200" # run GC less often + - name: GOMEMLIMIT + value: "3500MiB" # hard memory ceiling slightly below the container limit +``` + +--- + +## High CPU Usage / Latency + +```bash +# Check CPU usage +kubectl top pods -l app.kubernetes.io/name=bifrost + +# Check if HPA is scaling correctly +kubectl get hpa bifrost +kubectl describe hpa bifrost +``` + +Common causes: +- `initialPoolSize` too small — goroutines queuing up; increase to `500`–`1000` +- `dropExcessRequests: false` with a small pool — queue depth growing unboundedly + +```bash +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set bifrost.client.initialPoolSize=1000 \ + --set bifrost.client.dropExcessRequests=true +``` + +--- + +## Autoscaling Issues + +### HPA not scaling + +```bash +# Check HPA status and current metrics +kubectl describe hpa bifrost + +# Verify metrics server is installed +kubectl top nodes +kubectl top pods + +# Common fix: metrics server not installed +# Install with: +kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml +``` + +### Pods scaling down too aggressively (drops active SSE streams) + +The default `scaleDown.stabilizationWindowSeconds: 300` and `preStop` sleep of 15 seconds should prevent this. If streams are still being cut: + +```yaml +terminationGracePeriodSeconds: 120 # increase if streams run longer than 105s + +autoscaling: + behavior: + scaleDown: + stabilizationWindowSeconds: 600 # wait 10 min before scaling down + policies: + - type: Pods + value: 1 + periodSeconds: 300 # remove at most 1 pod per 5 min + +lifecycle: + preStop: + exec: + command: ["sh", "-c", "sleep 30"] # give load balancer more time to drain +``` + +```bash +helm upgrade bifrost bifrost/bifrost --reuse-values -f graceful-shutdown-values.yaml +``` + +--- + +## SQLite / PVC Issues + +### StatefulSet migration (upgrading from chart < v2.0.0) + +Older chart versions used a Deployment + manual PVC. v2.0.0 moved SQLite to a StatefulSet. If upgrading: + +```bash +# 1. Scale down the old deployment +kubectl scale deployment bifrost --replicas=0 + +# 2. Note the existing PVC name +kubectl get pvc + +# 3. Upgrade, pointing at the existing claim +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.existingClaim= \ + --set image.tag=v1.4.11 +``` + +### Data lost after upgrade + +```bash +# Check if PVCs still exist (they persist after helm uninstall) +kubectl get pvc -l app.kubernetes.io/instance=bifrost + +# Re-attach by setting existingClaim +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set storage.persistence.existingClaim= +``` + +--- + +## Cluster Mode Issues + +### Peers not discovering each other + +```bash +# Check gossip port is reachable between pods +kubectl exec -it bifrost-0 -- nc -zv bifrost-1.bifrost-headless 7946 + +# View gossip-related log lines +kubectl logs -l app.kubernetes.io/name=bifrost --tail=100 | grep -i gossip + +# Check the headless service exists +kubectl get svc bifrost-headless +``` + +For Kubernetes-based discovery, verify the service account has pod list permissions: + +```bash +kubectl auth can-i list pods --as=system:serviceaccount:default:bifrost +``` + +--- + +## Useful Diagnostic Commands + +```bash +# Full state dump for a support ticket +kubectl get all -l app.kubernetes.io/instance=bifrost +kubectl describe pod -l app.kubernetes.io/name=bifrost > pod-describe.txt +kubectl logs -l app.kubernetes.io/name=bifrost --tail=200 > pod-logs.txt + +# View the full rendered config.json +kubectl get configmap bifrost-config -o jsonpath='{.data.config\.json}' | jq . + +# Check current Helm values (shows all overrides) +helm get values bifrost + +# Check Helm release status +helm status bifrost + +# View Helm release history +helm history bifrost +``` + +--- + +## Still Stuck? + +- [GitHub Issues](https://github.com/maximhq/bifrost/issues) — search existing issues or open a new one +- [Enterprise Support](mailto:support@getmaxim.ai) — for enterprise customers with SLA diff --git a/docs/deployment-guides/helm/values.mdx b/docs/deployment-guides/helm/values.mdx new file mode 100644 index 0000000000..3161b206fb --- /dev/null +++ b/docs/deployment-guides/helm/values.mdx @@ -0,0 +1,718 @@ +--- +title: "Values Reference" +description: "Complete reference for Bifrost Helm chart values — key parameters, how to supply them, and links to example files" +icon: "sliders" +--- + +This page covers every top-level parameter group in the Bifrost Helm chart's `values.yaml`, how to supply values via `--set` vs `-f`, and where to find ready-made example files. + + +The full values schema is available at [https://getbifrost.ai/schema](https://getbifrost.ai/schema). All `values.yaml` fields map directly to `config.json` fields generated by the chart. + + +## Supplying Values + +### One-liner with `--set` + +Good for a single field or quick experiments: + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set replicaCount=3 \ + --set bifrost.client.initialPoolSize=500 +``` + +### Values file with `-f` + +Recommended for anything beyond a couple of fields: + +```bash +# Create your values file +cat > my-values.yaml <<'EOF' +image: + tag: "v1.4.11" + +replicaCount: 2 + +bifrost: + encryptionKey: "your-32-byte-encryption-key-here" + client: + initialPoolSize: 500 + enableLogging: true +EOF + +# Install +helm install bifrost bifrost/bifrost -f my-values.yaml + +# Upgrade later +helm upgrade bifrost bifrost/bifrost -f my-values.yaml + +# Upgrade and reuse all previously set values, overriding only one field +helm upgrade bifrost bifrost/bifrost \ + --reuse-values \ + --set replicaCount=5 +``` + +### Multiple values files + +Later files override earlier ones — useful for a base + environment-specific overlay: + +```bash +helm install bifrost bifrost/bifrost \ + -f base-values.yaml \ + -f production-overrides.yaml +``` + +--- + +## Key Parameters Reference + +### Image + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `image.repository` | Container image repository | `docker.io/maximhq/bifrost` | +| `image.tag` | **Required.** Image version (e.g. `v1.4.11`) | `""` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `imagePullSecrets` | List of pull secret names for private registries | `[]` | + +```bash +# Always specify the tag — the chart will not start without it +helm install bifrost bifrost/bifrost --set image.tag=v1.4.11 +``` + +### Replicas & Autoscaling + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `replicaCount` | Static replica count (ignored when HPA is enabled) | `1` | +| `autoscaling.enabled` | Enable Horizontal Pod Autoscaler | `false` | +| `autoscaling.minReplicas` | Minimum replicas | `1` | +| `autoscaling.maxReplicas` | Maximum replicas | `10` | +| `autoscaling.targetCPUUtilizationPercentage` | CPU target for scaling | `80` | +| `autoscaling.targetMemoryUtilizationPercentage` | Memory target for scaling | `80` | +| `autoscaling.behavior.scaleDown.stabilizationWindowSeconds` | Cooldown before scale-down (important for SSE streams) | `300` | +| `autoscaling.behavior.scaleDown.policies[0].value` | Max pods removed per period | `1` | + +### Resources + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `resources.requests.cpu` | CPU request | `500m` | +| `resources.requests.memory` | Memory request | `512Mi` | +| `resources.limits.cpu` | CPU limit | `2000m` | +| `resources.limits.memory` | Memory limit | `2Gi` | + +### Service + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `service.type` | `ClusterIP`, `LoadBalancer`, or `NodePort` | `ClusterIP` | +| `service.port` | Service port | `8080` | + +### Ingress + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `ingress.enabled` | Enable ingress | `false` | +| `ingress.className` | Ingress class (e.g. `nginx`, `traefik`) | `""` | +| `ingress.annotations` | Ingress annotations | `{}` | +| `ingress.hosts` | Host rules | see values.yaml | +| `ingress.tls` | TLS configuration | `[]` | + +```yaml +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + hosts: + - host: bifrost.yourdomain.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: bifrost-tls + hosts: + - bifrost.yourdomain.com +``` + +### Probes + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `livenessProbe.initialDelaySeconds` | Seconds before first liveness check | `30` | +| `livenessProbe.periodSeconds` | Liveness check interval | `30` | +| `readinessProbe.initialDelaySeconds` | Seconds before first readiness check | `10` | +| `readinessProbe.periodSeconds` | Readiness check interval | `10` | + +Both probes hit `GET /health`. + +### Graceful Shutdown + +Bifrost supports long-lived SSE streaming connections. The default `preStop` hook and termination grace period let in-flight streams finish before the pod is killed: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `terminationGracePeriodSeconds` | Total grace period | `60` | +| `lifecycle.preStop.exec.command` | Sleep before SIGTERM so load balancer drains | `["sh", "-c", "sleep 15"]` | + +Increase `terminationGracePeriodSeconds` if your typical stream responses take longer than 45 seconds. + +### Service Account + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `serviceAccount.create` | Create a dedicated service account | `true` | +| `serviceAccount.annotations` | Annotations (e.g. for IRSA, Workload Identity) | `{}` | +| `serviceAccount.name` | Override the generated name | `""` | + +### Pod Scheduling + +```yaml +# Spread replicas across nodes +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +# Pin to specific node pool +nodeSelector: + node-type: ai-workload + +# Tolerate GPU taints +tolerations: + - key: "gpu" + operator: "Equal" + value: "true" + effect: "NoSchedule" +``` + +### Extra Environment Variables + +Three ways to inject env vars: + +```yaml +# Inline key/value pairs +env: + - name: HTTP_PROXY + value: "http://proxy.corp.example.com:3128" + +# Map syntax (appended after env) +extraEnv: + NO_PROXY: "169.254.169.254,10.0.0.0/8" + +# Bulk-load from existing Secrets or ConfigMaps +envFrom: + - secretRef: + name: my-corp-secrets + - configMapRef: + name: my-app-config +``` + +### Init Containers + +```yaml +initContainers: + - name: wait-for-db + image: busybox:1.35 + command: ["sh", "-c", "until nc -z postgres-svc 5432; do sleep 2; done"] +``` + +--- + +## Values Examples + +The chart ships ready-made example files under [`helm-charts/bifrost/values-examples/`](https://github.com/maximhq/bifrost/tree/main/helm-charts/bifrost/values-examples): + +| File | Use case | +|------|----------| +| `sqlite-only.yaml` | Minimal local/dev setup | +| `postgres-only.yaml` | Single-store Postgres | +| `production-ha.yaml` | HA: 3 replicas, Postgres, Weaviate, HPA, Ingress | +| `providers-and-virtual-keys.yaml` | All 23 providers + 7 virtual key patterns | +| `secrets-from-k8s.yaml` | All sensitive values from Kubernetes Secrets | +| `external-postgres.yaml` | Point at an existing Postgres instance | +| `postgres-redis.yaml` | Postgres + Redis vector store | +| `postgres-weaviate.yaml` | Postgres + Weaviate vector store | +| `postgres-qdrant.yaml` | Postgres + Qdrant vector store | +| `semantic-cache-secret-example.yaml` | Semantic cache with secret injection | +| `mixed-backend.yaml` | Config store = postgres, logs store = sqlite | + +Install from an example file directly: + +```bash +helm install bifrost bifrost/bifrost \ + -f https://raw.githubusercontent.com/maximhq/bifrost/main/helm-charts/bifrost/values-examples/production-ha.yaml \ + --set image.tag=v1.4.11 +``` + +--- + +## Helm Operations + +### View current values + +```bash +helm get values bifrost +``` + +### Diff before upgrading (requires helm-diff plugin) + +```bash +helm diff upgrade bifrost bifrost/bifrost -f my-values.yaml +``` + +### Rollback + +```bash +helm history bifrost +helm rollback bifrost # to previous revision +helm rollback bifrost 2 # to revision 2 +``` + +### Uninstall + +```bash +helm uninstall bifrost + +# Also remove PVCs (deletes all data) +kubectl delete pvc -l app.kubernetes.io/instance=bifrost +``` + +--- + +## All Key Parameters + +A quick-reference table of the most commonly used top-level parameters: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `image.tag` | **Required.** Bifrost image version (e.g., `v1.4.11`) | `""` | +| `replicaCount` | Number of replicas | `1` | +| `storage.mode` | Storage backend (`sqlite` or `postgres`) | `sqlite` | +| `storage.persistence.size` | PVC size for SQLite | `10Gi` | +| `postgresql.enabled` | Deploy embedded PostgreSQL | `false` | +| `vectorStore.enabled` | Enable vector store | `false` | +| `vectorStore.type` | Vector store type (`weaviate`, `redis`, `qdrant`) | `none` | +| `bifrost.encryptionKey` | Encryption key (use `encryptionKeySecret` in production) | `""` | +| `ingress.enabled` | Enable ingress | `false` | +| `autoscaling.enabled` | Enable HPA | `false` | + +### Secret Reference Parameters + +Use existing Kubernetes Secrets instead of plain-text values. Every sensitive field in the chart has a corresponding `existingSecret` / `secretRef` alternative: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `bifrost.encryptionKeySecret.name` | Secret name for encryption key | `""` | +| `bifrost.encryptionKeySecret.key` | Key within the secret | `"encryption-key"` | +| `postgresql.external.existingSecret` | Secret name for PostgreSQL password | `""` | +| `postgresql.external.passwordKey` | Key within the secret | `"password"` | +| `vectorStore.redis.external.existingSecret` | Secret name for Redis password | `""` | +| `vectorStore.redis.external.passwordKey` | Key within the secret | `"password"` | +| `vectorStore.weaviate.external.existingSecret` | Secret name for Weaviate API key | `""` | +| `vectorStore.weaviate.external.apiKeyKey` | Key within the secret | `"api-key"` | +| `vectorStore.qdrant.external.existingSecret` | Secret name for Qdrant API key | `""` | +| `vectorStore.qdrant.external.apiKeyKey` | Key within the secret | `"api-key"` | +| `bifrost.plugins.maxim.secretRef.name` | Secret name for Maxim API key | `""` | +| `bifrost.plugins.maxim.secretRef.key` | Key within the secret | `"api-key"` | +| `bifrost.providerSecrets..existingSecret` | Secret name for provider API key | `""` | +| `bifrost.providerSecrets..key` | Key within the secret | `"api-key"` | +| `bifrost.providerSecrets..envVar` | Environment variable name to inject | `""` | + +--- + +## Advanced Configuration + +### Comprehensive Example + +A production-ready values file combining the most common settings: + +```yaml +# my-values.yaml +image: + tag: "v1.4.11" + +replicaCount: 3 + +storage: + mode: postgres + +postgresql: + enabled: true + auth: + password: "secure-password" # use existingSecret in production + +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 10 + +ingress: + enabled: true + className: nginx + hosts: + - host: bifrost.example.com + paths: + - path: / + pathType: Prefix + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "key" + providers: + openai: + keys: + - name: "primary" + value: "env.OPENAI_API_KEY" + weight: 1 + providerSecrets: + openai: + existingSecret: "provider-api-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f my-values.yaml +``` + +### Node Affinity & Scheduling + +Deploy to specific nodes and spread replicas across hosts: + +```yaml +nodeSelector: + node-type: ai-workload + +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: bifrost + topologyKey: kubernetes.io/hostname + +tolerations: + - key: "gpu" + operator: "Equal" + value: "true" + effect: "NoSchedule" +``` + +### Deployment & Pod Annotations + +Useful for tooling like [Keel](https://keel.sh) for automatic image updates or Datadog APM injection: + +```yaml +deploymentAnnotations: + keel.sh/policy: force + keel.sh/trigger: poll + +podAnnotations: + ad.datadoghq.com/bifrost.logs: '[{"source":"bifrost","service":"bifrost"}]' +``` + +--- + +## Common Patterns + +Ready-made values files for the most common deployment scenarios. Each pattern builds on the [quickstart](/deployment-guides/helm). + + + + +Simple setup for local testing. SQLite, single replica, no autoscaling. + +```bash +helm install bifrost bifrost/bifrost \ + --set image.tag=v1.4.11 \ + --set 'bifrost.providers.openai.keys[0].name=dev-key' \ + --set 'bifrost.providers.openai.keys[0].value=sk-your-key' \ + --set 'bifrost.providers.openai.keys[0].weight=1' +``` + +```bash +# Access +kubectl port-forward svc/bifrost 8080:8080 +``` + + + + +Multiple LLM providers with weighted load balancing. + +```bash +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' \ + --from-literal=anthropic-api-key='sk-ant-...' \ + --from-literal=gemini-api-key='your-gemini-key' +``` + +```yaml +# multi-provider.yaml +image: + tag: "v1.4.11" + +bifrost: + encryptionKey: "your-encryption-key" + + client: + enableLogging: true + allowDirectKeys: false + + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 2 # 50% of traffic + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" + weight: 1 # 25% + gemini: + keys: + - name: "gemini-primary" + value: "env.GEMINI_API_KEY" + weight: 1 # 25% + + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + anthropic: + existingSecret: "provider-keys" + key: "anthropic-api-key" + envVar: "ANTHROPIC_API_KEY" + gemini: + existingSecret: "provider-keys" + key: "gemini-api-key" + envVar: "GEMINI_API_KEY" + + plugins: + telemetry: + enabled: true + logging: + enabled: true +``` + +```bash +helm install bifrost bifrost/bifrost -f multi-provider.yaml +``` + + + + +Use an existing PostgreSQL instance — RDS, Cloud SQL, Azure Database, or self-managed. + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-external-postgres-password' +``` + +```yaml +# external-db.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: false + external: + enabled: true + host: "your-rds-endpoint.us-east-1.rds.amazonaws.com" + port: 5432 + user: "bifrost" + database: "bifrost" + sslMode: "require" + existingSecret: "postgres-credentials" + passwordKey: "password" + +bifrost: + encryptionKey: "your-encryption-key" + + providers: + openai: + keys: + - name: "openai-primary" + value: "sk-..." + weight: 1 +``` + +```bash +helm install bifrost bifrost/bifrost -f external-db.yaml +``` + + + + +Semantic response caching for high-volume AI inference. + +```bash +kubectl create secret generic bifrost-encryption \ + --from-literal=key='your-32-byte-encryption-key' + +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-your-key' +``` + +```yaml +# ai-workload.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: true + auth: + password: "secure-password" + primary: + persistence: + size: 50Gi + +vectorStore: + enabled: true + type: weaviate + weaviate: + enabled: true + persistence: + size: 50Gi + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "key" + + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 1 + + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + + plugins: + semanticCache: + enabled: true + config: + provider: "openai" + keys: + - value: "env.OPENAI_API_KEY" + weight: 1 + embedding_model: "text-embedding-3-small" + dimension: 1536 + threshold: 0.85 + ttl: "1h" + cache_by_model: true + cache_by_provider: true +``` + +```bash +helm install bifrost bifrost/bifrost -f ai-workload.yaml +``` + + + + +Zero credentials in values files — all sensitive data in Kubernetes Secrets. + +```bash +kubectl create secret generic postgres-credentials \ + --from-literal=password='your-postgres-password' + +kubectl create secret generic bifrost-encryption \ + --from-literal=key='your-encryption-key' + +kubectl create secret generic provider-keys \ + --from-literal=openai-api-key='sk-...' \ + --from-literal=anthropic-api-key='sk-ant-...' + +kubectl create secret generic qdrant-credentials \ + --from-literal=api-key='your-qdrant-api-key' +``` + +```yaml +# secrets-only.yaml +image: + tag: "v1.4.11" + +storage: + mode: postgres + +postgresql: + enabled: false + external: + enabled: true + host: "postgres.example.com" + port: 5432 + user: "bifrost" + database: "bifrost" + sslMode: "require" + existingSecret: "postgres-credentials" + passwordKey: "password" + +vectorStore: + enabled: true + type: qdrant + qdrant: + enabled: false + external: + enabled: true + host: "qdrant.example.com" + port: 6334 + existingSecret: "qdrant-credentials" + apiKeyKey: "api-key" + +bifrost: + encryptionKeySecret: + name: "bifrost-encryption" + key: "key" + + providers: + openai: + keys: + - name: "openai-primary" + value: "env.OPENAI_API_KEY" + weight: 1 + anthropic: + keys: + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" + weight: 1 + + providerSecrets: + openai: + existingSecret: "provider-keys" + key: "openai-api-key" + envVar: "OPENAI_API_KEY" + anthropic: + existingSecret: "provider-keys" + key: "anthropic-api-key" + envVar: "ANTHROPIC_API_KEY" +``` + +```bash +helm install bifrost bifrost/bifrost -f secrets-only.yaml +``` + + + diff --git a/docs/docs.json b/docs/docs.json index e9dbee41d9..d3d600003d 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -239,6 +239,7 @@ "enterprise/guardrails", "enterprise/clustering", "enterprise/adaptive-load-balancing", + "enterprise/user-provisioning", { "group": "Advanced Governance", "icon": "shield-check", @@ -248,7 +249,6 @@ ] }, "enterprise/mcp-with-fa", - "enterprise/vault-support", "enterprise/invpc-deployments", "enterprise/custom-plugins", "enterprise/audit-logs", @@ -325,7 +325,9 @@ "icon": "id-card", "pages": [ "enterprise/setting-up-okta", - "enterprise/setting-up-entra" + "enterprise/setting-up-entra", + "enterprise/setting-up-zitadel", + "enterprise/setting-up-google-workspace" ] }, { @@ -338,16 +340,6 @@ "integrations/guardrails/patronus-ai" ] }, - { - "group": "Secret Management (Vaults)", - "icon": "vault", - "pages": [ - "integrations/vaults/hashicorp-vault", - "integrations/vaults/aws-secrets-manager", - "integrations/vaults/google-secret-manager", - "integrations/vaults/azure-key-vault" - ] - }, { "group": "Observability", "icon": "binoculars", @@ -400,10 +392,31 @@ "pages": [ "deployment-guides/k8s", "deployment-guides/ecs", - "deployment-guides/helm", "deployment-guides/fly" ] }, + { + "group": "Config as Code", + "icon": "code", + "pages": [ + { + "group": "Helm", + "icon": "helicopter-symbol", + "pages": [ + "deployment-guides/helm", + "deployment-guides/helm/values", + "deployment-guides/helm/client", + "deployment-guides/helm/providers", + "deployment-guides/helm/storage", + "deployment-guides/helm/plugins", + "deployment-guides/helm/governance", + "deployment-guides/helm/guardrails", + "deployment-guides/helm/cluster", + "deployment-guides/helm/troubleshooting" + ] + } + ] + }, { "group": "Enterprise Deployment", "icon": "building", @@ -489,8 +502,10 @@ "item": "Open Source", "icon": "rocket", "pages": [ + "changelogs/v1.5.0-prerelease3", "changelogs/v1.5.0-prerelease2", "changelogs/v1.5.0-prerelease1", + "changelogs/v1.4.23", "changelogs/v1.4.22", "changelogs/v1.4.21", { @@ -746,7 +761,7 @@ }, { "source": "/features/enterprise/scim", - "destination": "/enterprise/setting-up-okta" + "destination": "/enterprise/user-provisioning" }, { "source": "/enterprise/intelligent-load-balancing", diff --git a/docs/enterprise/audit-logs.mdx b/docs/enterprise/audit-logs.mdx index 66e3de9c08..79f4c83951 100644 --- a/docs/enterprise/audit-logs.mdx +++ b/docs/enterprise/audit-logs.mdx @@ -50,7 +50,7 @@ icon: "scroll" - Rate limit changes - Provider key updates - Guardrail configuration changes -- SAML/OIDC settings updates +- SCIM/OIDC settings updates ### Data Access Events - PII detection and handling @@ -80,25 +80,10 @@ icon: "scroll" ```json { - "enterprise": { - "audit_logs": { - "enabled": true, - "retention": { - "duration": "365d", - "archive_after": "90d" - }, - "capture": { - "authentication": true, - "authorization": true, - "configuration_changes": true, - "data_access": true, - "security_events": true - }, - "immutability": { - "enabled": true, - "verification_method": "cryptographic_hash" - } - } + "audit_logs": { + "disabled": false, + "hmac_key": "env.AUDIT_HMAC_KEY", + "retention_days": 365 } } ``` @@ -126,64 +111,13 @@ BIFROST_AUDIT_IMMUTABLE=true -### Advanced Configuration +### Configuration Fields -```json -{ - "audit_logs": { - "enabled": true, - "backup": { - "type": "s3", - "bucket": "bifrost-audit-logs", - "region": "us-west-2", - "encryption": "AES256" - } - }, - "retention": { - "duration": "365d", - "archive_after": "90d", - "delete_after": "2555d", - "hot_storage_days": 30 - }, - "capture": { - "authentication": { - "enabled": true, - "include_failed_attempts": true, - "track_session_duration": true - }, - "authorization": { - "enabled": true, - "log_allowed_access": false, - "log_denied_access": true - }, - "configuration_changes": { - "enabled": true, - "track_before_after": true, - "exclude_fields": ["password", "api_key"] - }, - "data_access": { - "enabled": true, - "log_pii_detection": true, - "log_sensitive_operations": true - }, - "security_events": { - "enabled": true, - "severity_threshold": "medium" - } - }, - "enrichment": { - "geo_location": true, - "user_agent_parsing": true, - "ip_reputation": true - }, - "immutability": { - "enabled": true, - "verification_method": "cryptographic_hash", - "signing_key": "${AUDIT_LOG_SIGNING_KEY}" - } - } -} -``` +| Field | Type | Description | +|-------|------|-------------| +| `disabled` | boolean | When `true`, audit logging is turned off. Default: `false`. | +| `hmac_key` | string | HMAC secret key used to sign audit events. Minimum 32 bytes. Supports `env.` prefix for environment variables (e.g. `env.AUDIT_HMAC_KEY`). | +| `retention_days` | integer | Days to retain audit log entries. `0` disables retention-based cleanup. | --- diff --git a/docs/enterprise/clustering.mdx b/docs/enterprise/clustering.mdx index 3c037a3390..73ddc7e8ae 100644 --- a/docs/enterprise/clustering.mdx +++ b/docs/enterprise/clustering.mdx @@ -80,8 +80,7 @@ The new clustering configuration uses a `cluster_config` object with integrated "discovery": { "enabled": true, "type": "kubernetes", - "service_name": "bifrost-cluster", - // Discovery-specific configuration here + "service_name": "bifrost-cluster" }, "gossip": { "port": 10101, @@ -95,6 +94,8 @@ The new clustering configuration uses a `cluster_config` object with integrated } ``` +Discovery-specific fields (e.g. `k8s_label_selector`, `consul_address`, `etcd_endpoints`) slot into the `discovery` object alongside `type` — see each method's section below. + ### Common Discovery Configuration Fields All discovery methods support these common fields: @@ -173,7 +174,12 @@ Kubernetes discovery uses the K8s API to automatically discover pods based on la "k8s_label_selector": "app=bifrost" }, "gossip": { - "port": 10101 + "port": 10101, + "config": { + "timeout_seconds": 10, + "success_threshold": 3, + "failure_threshold": 3 + } } } } @@ -367,7 +373,12 @@ Consul discovery integrates with HashiCorp Consul for service registration and d "consul_address": "consul.service.consul:8500" }, "gossip": { - "port": 10101 + "port": 10101, + "config": { + "timeout_seconds": 10, + "success_threshold": 3, + "failure_threshold": 3 + } } } } @@ -510,7 +521,12 @@ etcd discovery uses etcd's distributed key-value store for service registration "dial_timeout": "10s" }, "gossip": { - "port": 10101 + "port": 10101, + "config": { + "timeout_seconds": 10, + "success_threshold": 3, + "failure_threshold": 3 + } } } } @@ -661,7 +677,12 @@ DNS discovery uses standard DNS resolution to discover cluster nodes. Works with "bind_port": 10101 }, "gossip": { - "port": 10101 + "port": 10101, + "config": { + "timeout_seconds": 10, + "success_threshold": 3, + "failure_threshold": 3 + } } } } @@ -806,7 +827,12 @@ UDP broadcast discovery automatically finds nodes on the same local network usin "dial_timeout": "10s" }, "gossip": { - "port": 10101 + "port": 10101, + "config": { + "timeout_seconds": 10, + "success_threshold": 3, + "failure_threshold": 3 + } } } } @@ -937,7 +963,12 @@ mDNS (Multicast DNS) provides zero-configuration service discovery on local netw "dial_timeout": "10s" }, "gossip": { - "port": 10101 + "port": 10101, + "config": { + "timeout_seconds": 10, + "success_threshold": 3, + "failure_threshold": 3 + } } } } @@ -1353,7 +1384,12 @@ sudo cat > /etc/bifrost/config.json < 0", - "apply_to": "input", - "sampling_rate": 100, - "timeout": 2000, - "provider_config_ids": [1] - } - ] - } + "guardrails_config": { + "guardrail_rules": [ + { + "id": 1, + "name": "Block PII in Prompts", + "description": "Prevent PII from being sent to LLM providers", + "enabled": true, + "cel_expression": "request.messages.exists(m, m.role == \"user\")", + "apply_to": "input", + "sampling_rate": 100, + "timeout": 5000, + "provider_config_ids": [1, 2] + }, + { + "id": 2, + "name": "Content Filter for Responses", + "description": "Filter harmful content from LLM responses", + "enabled": true, + "cel_expression": "true", + "apply_to": "output", + "sampling_rate": 100, + "timeout": 3000, + "provider_config_ids": [2] + }, + { + "id": 3, + "name": "Prompt Injection Detection", + "description": "Detect and block prompt injection attempts", + "enabled": true, + "cel_expression": "request.messages.size() > 0", + "apply_to": "input", + "sampling_rate": 100, + "timeout": 2000, + "provider_config_ids": [1] + } + ] } } ``` @@ -279,27 +277,26 @@ curl -X DELETE http://localhost:8080/api/enterprise/guardrails/rules/1 ```yaml -enterprise: - guardrails: - rules: - - id: 1 - name: "Block PII in Prompts" - description: "Prevent PII from being sent to LLM providers" - enabled: true - cel_expression: "request.messages.exists(m, m.role == 'user')" - apply_to: "input" - sampling_rate: 100 - timeout: 5000 - provider_config_ids: [1, 2] - - id: 2 - name: "Content Filter for Responses" - description: "Filter harmful content from LLM responses" - enabled: true - cel_expression: "true" - apply_to: "output" - sampling_rate: 100 - timeout: 3000 - provider_config_ids: [2] +guardrails_config: + guardrail_rules: + - id: 1 + name: "Block PII in Prompts" + description: "Prevent PII from being sent to LLM providers" + enabled: true + cel_expression: "request.messages.exists(m, m.role == 'user')" + apply_to: "input" + sampling_rate: 100 + timeout: 5000 + provider_config_ids: [1, 2] + - id: 2 + name: "Content Filter for Responses" + description: "Filter harmful content from LLM responses" + enabled: true + cel_expression: "true" + apply_to: "output" + sampling_rate: 100 + timeout: 3000 + provider_config_ids: [2] ``` @@ -464,9 +461,8 @@ curl -X DELETE http://localhost:8080/api/enterprise/guardrails/providers/1 ```json { - "enterprise": { - "guardrails": { - "guardrail_providers": [ + "guardrails_config": { + "guardrail_providers": [ { "id": 1, "provider_name": "bedrock", @@ -522,57 +518,55 @@ curl -X DELETE http://localhost:8080/api/enterprise/guardrails/providers/1 ] } } -} ``` ```yaml -enterprise: - guardrails: - providers: - - id: 1 - provider_name: "bedrock" - policy_name: "PII Detection Profile" - enabled: true - config: - guardrail_arn: "arn:aws:bedrock:us-east-1:123456789:guardrail/abc123" - guardrail_version: "1" - region: "us-east-1" - # AWS Authentication (choose one method): - # Option 1: Explicit credentials - access_key: "${AWS_ACCESS_KEY_ID}" - secret_key: "${AWS_SECRET_ACCESS_KEY}" - # Option 2: IAM Role - omit access_key and secret_key - # (Bifrost will use IAM credentials from the environment) - - id: 2 - provider_name: "azure" - policy_name: "Content Safety Profile" - enabled: true - config: - endpoint: "https://your-resource.cognitiveservices.azure.com/" - api_key: "${AZURE_CONTENT_SAFETY_API_KEY}" - analyze_enabled: true - analyze_severity_threshold: "medium" - jailbreak_shield_enabled: true - - id: 3 - provider_name: "grayswan" - policy_name: "Custom Safety Rules" - enabled: true - config: - api_key: "${GRAYSWAN_API_KEY}" - violation_threshold: 0.5 - reasoning_mode: "hybrid" - rules: - no_pii: "Do not allow personally identifiable information" - professional_tone: "Ensure responses maintain a professional tone" - - id: 4 - provider_name: "patronus_ai" - policy_name: "Hallucination Detection" - enabled: true - config: - api_endpoint: "https://api.patronus.ai/v1" +guardrails_config: + guardrail_providers: + - id: 1 + provider_name: "bedrock" + policy_name: "PII Detection Profile" + enabled: true + config: + guardrail_arn: "arn:aws:bedrock:us-east-1:123456789:guardrail/abc123" + guardrail_version: "1" + region: "us-east-1" + # AWS Authentication (choose one method): + # Option 1: Explicit credentials + access_key: "${AWS_ACCESS_KEY_ID}" + secret_key: "${AWS_SECRET_ACCESS_KEY}" + # Option 2: IAM Role - omit access_key and secret_key + # (Bifrost will use IAM credentials from the environment) + - id: 2 + provider_name: "azure" + policy_name: "Content Safety Profile" + enabled: true + config: + endpoint: "https://your-resource.cognitiveservices.azure.com/" + api_key: "${AZURE_CONTENT_SAFETY_API_KEY}" + analyze_enabled: true + analyze_severity_threshold: "medium" + jailbreak_shield_enabled: true + - id: 3 + provider_name: "grayswan" + policy_name: "Custom Safety Rules" + enabled: true + config: + api_key: "${GRAYSWAN_API_KEY}" + violation_threshold: 0.5 + reasoning_mode: "hybrid" + rules: + no_pii: "Do not allow personally identifiable information" + professional_tone: "Ensure responses maintain a professional tone" + - id: 4 + provider_name: "patronus_ai" + policy_name: "Hallucination Detection" + enabled: true + config: + api_endpoint: "https://api.patronus.ai/v1" ``` diff --git a/docs/enterprise/rbac.mdx b/docs/enterprise/rbac.mdx index e1f4f61c1e..86cd19e4a9 100644 --- a/docs/enterprise/rbac.mdx +++ b/docs/enterprise/rbac.mdx @@ -14,7 +14,7 @@ Role-Based Access Control (RBAC) in Bifrost Enterprise provides fine-grained acc - **Audit-Ready** - Track who has access to what for compliance requirements - **Flexible Role Design** - Use system roles or create custom roles for your organization -RBAC integrates seamlessly with [Identity Provider authentication](./advanced-governance#identity-provider-integration), automatically assigning roles based on your IdP groups and claims. +RBAC integrates seamlessly with [User Provisioning (SCIM)](./user-provisioning), automatically assigning roles based on your IdP groups and claims. --- @@ -169,7 +169,7 @@ Custom "Ops" role with: ## Integration with Identity Providers -When using [Okta](./setting-up-okta) or [Microsoft Entra](./setting-up-entra) for authentication, roles can be automatically assigned based on: +When using [User Provisioning (SCIM)](./user-provisioning) with Okta, Entra, Zitadel, Keycloak, or Google Workspace, roles can be automatically assigned based on: - **IdP Groups** - Map identity provider groups to Bifrost roles - **App Roles** - Sync application roles from your IdP diff --git a/docs/enterprise/setting-up-entra.mdx b/docs/enterprise/setting-up-entra.mdx index a796138b14..9d2848ee21 100644 --- a/docs/enterprise/setting-up-entra.mdx +++ b/docs/enterprise/setting-up-entra.mdx @@ -51,7 +51,11 @@ You can add an app icon to make the application easily recognizable. The Bifrost --- -## Step 2: Create App Roles +## Step 2: Create App Roles (Optional) + + + This step is optional. You can create custom roles if thats the preferred way. Or you can map any attribute to role/team/business unit. Role mapping is required step. + Configure roles in Entra that map to Bifrost's role hierarchy (Admin, Developer, Viewer). @@ -93,10 +97,6 @@ Configure roles in Entra that map to Bifrost's role hierarchy (Admin, Developer, | **Description** | Admin role on Bifrost | | **State** | Enabled | - -The role **Value** must be lowercase (`admin`, `developer`, `viewer`) to match Bifrost's role resolution logic. Users with multiple roles will be assigned the highest privilege role. - - --- ## Step 3: Enable Assignment Required @@ -168,6 +168,10 @@ Ensure your application has the necessary permissions. ## Step 6: Configure Token Claims (Optional) + + Groups and other attributes are required in the claim when you configure their mapping in Bifrost. + + By default, Entra includes the `roles` claim when app roles are assigned. To include group memberships for team synchronization: 1. Go to **Token configuration** @@ -177,10 +181,6 @@ By default, Entra includes the `roles` claim when app roles are assigned. To inc - For token type, enable **ID** and **Access** 4. Click **Add** - -Group IDs from Entra will be used as team IDs in Bifrost. You may want to create groups in Entra that correspond to your teams. - - --- ## Step 7: Assign Users and Roles @@ -203,12 +203,45 @@ You can assign roles to groups for easier management. All users in a group will --- -## Step 8: Configure Bifrost +## Step 8: Configure App Manifest + + +Microsoft entra app manifest + + +You will need to make 2 changes in the app manifest + +```json +"requestedAccessTokenVersion": 2 +``` + +and + +```json +"optionalClaims": { + "idToken": [ + { + "name": "roles", + "source": null, + "essential": false, + "additionalProperties": [] + } + ], + "accessToken": [], + "saml2Token": [] +} +``` + +## Step 9: Configure Bifrost Now configure Bifrost to use Microsoft Entra as the identity provider. ### Using the Bifrost UI + + Create token dialog in Okta + + 1. Navigate to **Governance** → **User Provisioning** in your Bifrost dashboard 2. Select **Microsoft Entra** as the SCIM Provider 3. Enter the following configuration: @@ -221,8 +254,9 @@ Now configure Bifrost to use Microsoft Entra as the identity provider. | **Audience** | Your Client ID (optional, defaults to Client ID) | | **App ID URI** | `api://{client-id}` (optional, for v1.0 tokens) | -4. Toggle **Enabled** to activate the provider -5. Click **Save Configuration** +5. **Verify** configuration and see if you get any errors. Make sure you get no errors/warnings. +6. Toggle **Enabled** to activate the provider +7. Click **Save Configuration** After saving, you'll need to restart your Bifrost server for the changes to take effect. @@ -236,33 +270,72 @@ After saving, you'll need to restart your Bifrost server for the changes to take | `clientId` | Yes | Application (client) ID | | `clientSecret` | Yes | Client secret for OAuth authentication | | `audience` | No | JWT audience for validation (defaults to clientId) | -| `appIdUri` | No | App ID URI for v1.0 tokens (e.g., `api://{clientId}`) | -| `userIdField` | No | JWT claim for user ID (default: `oid`) | -| `rolesField` | No | JWT claim for roles (default: `roles`) | -| `teamIdsField` | No | JWT claim for group/team IDs (default: `groups`) | +| `attributeRoleMappings` | Yes | Ordered list of attribute→role mappings. First match wins. | +| `attributeTeamMappings` | No | Attribute→team mappings (all matches apply). | +| `attributeBusinessUnitMappings` | No | Attribute→business-unit mappings (all matches apply). | --- -## Role Mapping +### Attribute Mappings + +Attribute mappings let you translate Okta claim values into Bifrost roles, teams, or business units without restructuring your Okta claims. Bifrost supports three mapping types: -Bifrost automatically maps Entra app roles to its internal role hierarchy: +- **`attributeRoleMappings`**: map a claim value to a Bifrost role (Admin, Developer, Viewer, or a custom role) +- **`attributeTeamMappings`**: map a claim value to a Bifrost team +- **`attributeBusinessUnitMappings`**: map a claim value to a Bifrost business unit -| Entra Role Value | Bifrost Role | Privilege Level | -|------------------|--------------|-----------------| -| `admin` | Admin | Highest | -| `developer` | Developer | Medium | -| `viewer` | Viewer | Lowest | +These mappings work with any Okta claim — the `groups` claim from Step 5, the custom `role` claim from Step 4, or any other claim your authorization server includes in the token (e.g., `department`, `organization`). -**Multiple Roles:** If a user has multiple roles assigned, Bifrost automatically selects the highest privilege role. For example, a user with both `viewer` and `developer` roles will be assigned the Developer role in Bifrost. +To configure attribute mappings: -**Default Role:** Users without any assigned role will default to the Viewer role. +1. In the User Provisioning configuration, scroll down to **Attribute Mappings** +2. Click **Add Mapping** under the relevant mapping type (Role, Team, or Business Unit) +3. Enter the **Attribute** (the claim name from the token), the **Value** to match, and the target **Role**, **Team**, or **Business Unit** +4. Repeat for each rule you need + + + Attribute Mappings configuration in Bifrost + + + + When you mark value as "*" - the claim value is mapped as is to the entity name. Values comparisons are case-insensitive. + + +### Custom attribute mapping + +You can also map any custom attributes to any entity (role, team or business unit). Make sure these are configured to send back to Bifrost in token configuration. + + + Attribute Mappings configuration in Bifrost + + + +#### Evaluation rules + +- **Role mappings**: Ordered, first match wins. If no rule matches, users are not allowed to login into the system. +- **Team and business unit mappings**: All matching rules apply — users can be placed on multiple teams and business units simultaneously. +- **Claim values**: Can be strings, arrays, or nested objects. Bifrost resolves dotted paths (e.g., `realm_access.roles`). + + + If a user matches multiple role mapping rules, the highest privilege role is assigned. If no + mapping matches, the first user to sign in receives the **Admin** role, and subsequent users receive the **Viewer** + role. + + +5. Click **Save Configuration** --- ## Testing the Integration 1. Open your Bifrost dashboard in a new browser or incognito window -2. You should be redirected to Microsoft login +2. You should be redirected to Entra for authentication 3. Log in with an assigned user 4. After successful authentication, you'll be redirected back to Bifrost 5. Verify the user appears in the Bifrost users list with the correct role @@ -288,23 +361,6 @@ Bifrost automatically maps Entra app roles to its internal role hierarchy: - Ensure you're using the secret **Value**, not the secret ID - Check for any leading/trailing whitespace when copying -### Roles not appearing in token - -- Ensure users are assigned to the Enterprise Application with a role -- Verify app roles are created with the correct lowercase values -- Check that "Assignment required" is enabled - -### "AADSTS70011: The provided request includes an invalid scope" - -- This usually happens when mixing `.default` scope with other scopes -- Bifrost handles this automatically - ensure you're using the latest version - -### Groups not syncing as teams - -- Verify the groups claim is configured in Token configuration -- Ensure users are members of the groups -- Check that groups are created and assigned in Entra - ### Token validation errors - Ensure the Tenant ID matches your Azure directory @@ -315,6 +371,7 @@ Bifrost automatically maps Entra app roles to its internal role hierarchy: ## Next Steps +- **[User Provisioning (SCIM)](./user-provisioning)** - Overview of SCIM in Bifrost and alternative identity providers - **[Advanced Governance](./advanced-governance)** - Learn about user budgets and compliance features - **[Role-Based Access Control](./advanced-governance#role-hierarchy)** - Understand the Admin, Developer, Viewer hierarchy - **[Audit Logs](./audit-logs)** - Monitor user authentication and activity diff --git a/docs/enterprise/setting-up-google-workspace.mdx b/docs/enterprise/setting-up-google-workspace.mdx new file mode 100644 index 0000000000..18fcbe7f9b --- /dev/null +++ b/docs/enterprise/setting-up-google-workspace.mdx @@ -0,0 +1,232 @@ +--- +title: "Setting up Google Workspace" +description: "Step-by-step guide to configure Google Workspace as your identity provider for Bifrost Enterprise SSO and Directory-based user provisioning." +icon: "google" +--- + +## Overview + +This guide walks you through configuring **Google Workspace** as your identity provider for Bifrost Enterprise. The integration has two pieces: + +1. **OAuth 2.0 login** — users sign in to Bifrost with their Google Workspace accounts via a Google OAuth Client ID. +2. **Directory API provisioning (optional)** — a Google **service account** with domain-wide delegation lets Bifrost list users and groups from the Workspace directory for bulk import and team sync. + +You can run login-only (no service account) or full provisioning (with service account + domain-wide delegation). + +## Prerequisites + +- A Google Workspace domain with **Super Admin** access to the Admin console +- A Google Cloud project where you can create OAuth clients and service accounts +- Bifrost Enterprise deployed and accessible +- The redirect URI for your Bifrost instance (e.g. `https://your-bifrost-domain.com/login`) +- Bifrost [roles](./rbac) created for the roles you plan to map + +--- + +## Step 1: Configure the OAuth consent screen + +1. In the Google Cloud Console, go to **APIs & Services → OAuth consent screen**. + + + Google OAuth consent screen configuration + + +2. Choose **Internal** if you only want Workspace users, or **External** otherwise. +3. Fill in App name, support email, and developer contact. +4. Add the scopes: `openid`, `profile`, `email`. +5. Save. + +--- + +## Step 2: Create an OAuth Client ID + +1. Open **APIs & Services → Credentials → Create credentials → OAuth client ID**. + + + Creating a Google OAuth Web Application Client ID + + +2. Configure: + +| Field | Value | +| --- | --- | +| **Application type** | Web application | +| **Name** | Bifrost Enterprise | +| **Authorized JavaScript origins** | `https://your-bifrost-domain.com` | +| **Authorized redirect URIs** | `https://your-bifrost-domain.com/login` | + +3. Save and copy the **Client ID** and **Client Secret**. + +--- + +## Step 3: (Optional) Create a service account for Directory API access + +Skip this section if you only want SSO login without directory-based user import. + +1. Go to **IAM & Admin → Service Accounts → Create service account**. + + + Creating a Google service account + + +2. Give it a name (e.g. `bifrost-provisioning`). You can skip the "Grant this service account access to project" step — no GCP IAM roles are required; access is granted via domain-wide delegation in Step 5. +3. Open the service account → **Keys → Add Key → Create new key → JSON**. Download and store the JSON file securely. +4. From the service account **Details** tab, copy the **Unique ID** (a numeric value, **not** the email or OAuth Client ID). + +--- + +## Step 4: Enable the Admin SDK API + +If you're using the service account path: + +1. Open **APIs & Services → Library**. +2. Search for **Admin SDK API** and click **Enable**. + +--- + +## Step 5: Set up domain-wide delegation + +1. In the [Google Admin Console](https://admin.google.com), go to **Security → Access and data control → API controls → Manage Domain Wide Delegation**. + + + Google Workspace Domain-Wide Delegation configuration + + +2. Click **Add new**. +3. Enter the service account's **Unique ID** (from Step 3). +4. Add these OAuth scopes (copy the full URLs, comma-separated): + +``` +https://www.googleapis.com/auth/admin.directory.user.readonly, +https://www.googleapis.com/auth/admin.directory.group.readonly, +https://www.googleapis.com/auth/admin.directory.group.member.readonly +``` + +5. **Authorize**. + + + Domain-wide delegation requires impersonating an admin user. Pick an admin email that will persist (e.g. a dedicated `sso-admin@company.com`) — Bifrost uses this as the **Admin Email** in configuration. + + +--- + +## Step 6: Configure Bifrost + +### Using the Bifrost dashboard + +1. In Bifrost, go to **Governance → User Provisioning**. +2. Select **Google Workspace** as the SCIM Provider. +3. Fill in the fields: + +| Field | Value | +| --- | --- | +| **Domain** | Your Google Workspace primary domain (e.g. `company.com`) | +| **Client ID** | OAuth Client ID from Step 2 | +| **Client Secret** | OAuth Client Secret from Step 2 | +| **Audience** | Optional override (defaults to Client ID) | +| **Admin Email** | Admin user to impersonate for Directory API (Step 5) | +| **Service Account Source** | Choose one: Paste JSON / Environment variable / File path | +| **Service Account JSON / Env Var / File** | The value for the chosen source | + + + Bifrost Google Workspace configuration form + + +4. Click **Verify** — Bifrost validates the OAuth client and, if a service account is provided, attempts a Directory API impersonation to confirm delegation is working. +5. Configure **Attribute → Role / Team / Business Unit** mappings to map groups or organizational units to Bifrost roles and teams. +6. Toggle **Enabled** and click **Save Configuration**. + +### Using `config.json` + +```json +{ + "scim_config": { + "enabled": true, + "provider": "google", + "config": { + "domain": "company.com", + "clientId": "123-abc.apps.googleusercontent.com", + "clientSecret": "${GOOGLE_WORKSPACE_CLIENT_SECRET}", + "adminEmail": "sso-admin@company.com", + "serviceAccountEnvVar": "GOOGLE_SA_JSON", + "teamIdsField": "groups" + } + } +} +``` + +Pick one of the three service-account sources: `serviceAccountJson` (raw JSON string), `serviceAccountEnvVar` (env var name holding the JSON), or `serviceAccountFile` (absolute path to the key file). + + +### Custom attribute mapping + +You can also map any custom attributes to any entity (role, team or business unit). Make sure these are configured to send back to Bifrost in token configuration. + + + Attribute Mappings configuration in Bifrost + + +### Configuration reference + +| Field | Required | Description | +| --- | --- | --- | +| `domain` | Yes | Google Workspace primary domain (e.g. `company.com`). | +| `clientId` | Yes | OAuth 2.0 Web Client ID from Step 2. | +| `clientSecret` | Yes | Client Secret — required for token revocation and for confidential server-side flows. | +| `audience` | No | Expected JWT audience. Defaults to `clientId`. | +| `adminEmail` | Yes | Workspace admin to impersonate via domain-wide delegation. Required when any service-account field is set. | +| `serviceAccountJson` | One of 3 | Raw JSON string of the service account key. | +| `serviceAccountEnvVar` | One of 3 | Name of the environment variable containing the JSON. | +| `serviceAccountFile` | One of 3 | Absolute path to the JSON key file on the Bifrost host. | +| `attributeRoleMappings` | Yes | Ordered list of attribute→role mappings. | +| `attributeTeamMappings` | No | Attribute→team mappings (all matches apply). | +| `attributeBusinessUnitMappings` | No | Attribute→business-unit mappings (all matches apply). | + + + Bifrost rejects configs that set a service-account credential source without `adminEmail` — domain-wide delegation cannot work without an impersonation subject. + + +--- + +## Testing the Integration + +1. Open the Bifrost dashboard in an incognito window. +2. You're redirected to `accounts.google.com`; sign in with a Workspace user. +3. Verify you land on the Bifrost dashboard and appear under **Governance → Users**. +4. If provisioning is configured, open **Governance → User Provisioning → Import Users**, filter by a Workspace group, click **Preview**, and confirm users show up. + +--- + +## Troubleshooting + +### `admin_policy_enforced` or `access_denied` during OAuth + +- The Workspace admin has blocked third-party OAuth apps. In the Admin Console, go to **Security → Access and data control → API controls** and allow the Bifrost OAuth client. + +### `unauthorized_client: Client is unauthorized to retrieve access tokens` + +- The service account Unique ID and scopes in **Domain-Wide Delegation** don't match. Re-enter the Unique ID (the numeric value from the service account's **Details** tab, not the OAuth client ID). + +### `Not Authorized to access this resource/api` from Directory API + +- The impersonated `adminEmail` is missing the **User Management Admin** role. Promote them in Admin Console → Admin roles. +- The Admin SDK API is not enabled on the Cloud project. + +### Users see a consent prompt every login + +- On the OAuth consent screen, ensure the app is **Published** (or **Internal** for Workspace-only apps) so it doesn't stay in testing mode. + +### `domain_mismatch` + +- The primary domain in the Workspace does not match the `domain` field. Use the primary domain, not an alias. + +--- + +## Next Steps + +- [User Provisioning overview](./user-provisioning) — capabilities, attribute mappings, bulk import +- [Role-Based Access Control](./rbac) — configure custom roles before mapping +- [Audit Logs](./audit-logs) — track authentication events diff --git a/docs/enterprise/setting-up-keycloak.mdx b/docs/enterprise/setting-up-keycloak.mdx new file mode 100644 index 0000000000..b2fff165ba --- /dev/null +++ b/docs/enterprise/setting-up-keycloak.mdx @@ -0,0 +1,237 @@ +--- +title: "Setting up Keycloak" +description: "Step-by-step guide to configure a self-hosted Keycloak realm as your identity provider for Bifrost Enterprise SSO and user provisioning." +icon: "lock" +--- + +## Overview + +This guide walks you through configuring [Keycloak](https://www.keycloak.org) as your identity provider for Bifrost Enterprise. Keycloak uses standard OIDC with JWKS-based JWT validation, and Bifrost uses the same client for both user login and Admin REST API access (via the Service Account of a confidential client). + +After completing this guide, users will sign in with their Keycloak credentials and admins can bulk-import users and groups via the Keycloak Admin REST API. + +## Prerequisites + +- A running Keycloak server (self-hosted or cloud) with admin access to a realm +- Bifrost Enterprise deployed and accessible +- The redirect URI for your Bifrost instance (e.g. `https://your-bifrost-domain.com/login`) +- Bifrost [roles](./rbac) created for the roles you plan to map + +--- + +## Step 1: Create a Client + +1. In the Keycloak Admin Console, select your realm and go to **Clients → Create client**. + + + Creating a client in Keycloak + + +2. Configure the client: + +| Field | Value | +| --- | --- | +| **Client type** | OpenID Connect | +| **Client ID** | `bifrost` (or your preferred identifier) | +| **Name** | `Bifrost Enterprise` | + +3. On the **Capability config** step enable: + - **Client authentication** (makes it a confidential client) + - **Standard flow** (Authorization Code) + - **Service accounts roles** (required for Admin REST API access) + + + Keycloak client capability configuration + + +4. On the **Login settings** step set: + +| Field | Value | +| --- | --- | +| **Valid redirect URIs** | `https://your-bifrost-domain.com/login` | +| **Valid post logout redirect URIs** | `https://your-bifrost-domain.com` | +| **Web origins** | `https://your-bifrost-domain.com` | + +5. **Save** the client. + +--- + +## Step 2: Copy the client credentials + +1. Open the client and go to the **Credentials** tab. +2. Copy the **Client Secret**. + + + Keycloak client credentials tab + + +--- + +## Step 3: Configure role and group mappers + +Keycloak does not include realm roles or full group paths in tokens by default. Add two mappers on the client's dedicated scope. + +1. Open the client → **Client Scopes** tab → click the client's `-dedicated` scope. +2. Click **Add mapper → By configuration**. + +### Group Membership mapper + + + Group Membership mapper configuration + + +| Field | Value | +| --- | --- | +| **Mapper Type** | Group Membership | +| **Name** | `groups` | +| **Token Claim Name** | `groups` | +| **Full group path** | **On** | +| **Add to ID token** | **On** | +| **Add to access token** | **On** | +| **Add to userinfo** | **On** | + + + Bifrost uses full group paths for consistent matching across SSO and bulk provisioning flows — keep **Full group path** enabled. + + +### Realm Roles mapper + +| Field | Value | +| --- | --- | +| **Mapper Type** | User Realm Role | +| **Name** | `realm_roles` | +| **Token Claim Name** | `realm_access.roles` | +| **Claim JSON Type** | String | +| **Multivalued** | **On** | +| **Add to ID token** | **On** | +| **Add to access token** | **On** | + +--- + +## Step 4: Assign Admin REST API permissions + +The same client runs both authentication and provisioning. Grant it read access to the realm so it can list users and groups. + +1. Open the client → **Service accounts roles** tab. +2. Click **Assign role** and select: + - `realm-management` → **view-users** (required) + - `realm-management` → **view-realm** (recommended, enables group and role listing) + - `realm-management` → **query-groups** (optional, for group filters) + + + Keycloak service account roles + + +--- + +## Step 5: Create realm roles and groups + +Create the roles and groups you plan to map into Bifrost. + +1. **Realm → Realm roles → Create role** for each role (e.g. `bifrost-admin`, `bifrost-developer`, `bifrost-viewer`). +2. **Realm → Groups → Create group** for each team you want to sync (e.g. `/platform`, `/data-science`). +3. Assign users to the appropriate roles and groups under **Users → your user → Role mapping** / **Groups**. + +--- + +## Step 6: Configure Bifrost + +### Using the Bifrost dashboard + +1. In Bifrost, go to **Governance → User Provisioning**. +2. Select **Keycloak** as the SCIM Provider. +3. Fill in the fields: + +| Field | Value | +| --- | --- | +| **Server URL** | `https://keycloak.company.com` (no `/realms/...` suffix) | +| **Realm** | Your realm name (e.g. `master`, `bifrost-prod`) | +| **Client ID** | Client ID from Step 1 | +| **Client Secret** | Client Secret from Step 2 | +| **Audience** | Optional — defaults to Client ID | +| **Team IDs Field** | Leave as `groups` (default) or change if you used a different mapper name | + +4. Click **Verify** — Bifrost connects to Keycloak's JWKS and Admin REST API to confirm the client and service-account roles. +5. Configure **Attribute → Role / Team / Business Unit** mappings if needed. +6. Toggle **Enabled** and click **Save Configuration**. + + + Bifrost Keycloak configuration form + + +### Using `config.json` + +```json +{ + "scim_config": { + "enabled": true, + "provider": "keycloak", + "config": { + "serverUrl": "https://keycloak.company.com", + "realm": "bifrost-prod", + "clientId": "bifrost", + "clientSecret": "${KEYCLOAK_CLIENT_SECRET}", + "teamIdsField": "groups" + } + } +} +``` + +### Configuration reference + +| Field | Required | Description | +| --- | --- | --- | +| `serverUrl` | Yes | Base URL of the Keycloak server. Must be a valid URL (e.g. `https://keycloak.company.com`) and must **not** include `/realms/...`. | +| `realm` | Yes | Realm name. | +| `clientId` | Yes | Client ID created in Step 1. | +| `clientSecret` | Yes | Client secret — required because the client is confidential. | +| `audience` | No | Expected JWT audience. Defaults to `clientId`. | +| `teamIdsField` | No | JWT claim for group IDs. Defaults to `groups`. | +| `attributeRoleMappings` | No | Ordered list of attribute→role mappings. | +| `attributeTeamMappings` | No | Attribute→team mappings (all matches apply). | +| `attributeBusinessUnitMappings` | No | Attribute→business-unit mappings (all matches apply). | + +--- + +## Testing the Integration + +1. Open the Bifrost dashboard in an incognito window. +2. You'll be redirected to Keycloak's login page. +3. Sign in with a Keycloak user that has one of the roles you mapped. +4. Verify the user appears under **Governance → Users** with the expected role and teams. +5. From **Governance → User Provisioning → Import Users**, verify the service account can list users. + +--- + +## Troubleshooting + +### `serverUrl must not include /realms/{realm}` + +The `serverUrl` field is the base Keycloak URL. Set the realm in the separate **Realm** field. Example: `https://keycloak.company.com` + realm `bifrost-prod` — **not** `https://keycloak.company.com/realms/bifrost-prod`. + +### Users redirected back to login + +- Confirm the client's **Valid redirect URIs** exactly match your Bifrost login URL (trailing slash matters). +- Verify the client is **Enabled** in Keycloak. + +### Roles not appearing in the token + +- Check that the **User Realm Role** mapper adds to both ID and Access tokens. +- Use `Evaluate` on the client scope to preview the token a user would receive. + +### Service account cannot list users + +- Confirm `realm-management → view-users` is assigned under **Service accounts roles**. +- If you enabled **Authorization** on the client, service account tokens may not work — disable Authorization (fine-grained authz) for this client. + +### `jwks keys not found` + +- Make sure the server URL is reachable from Bifrost. The JWKS endpoint is `{serverUrl}/realms/{realm}/protocol/openid-connect/certs`. + +--- + +## Next Steps + +- [User Provisioning overview](./user-provisioning) — capabilities, attribute mappings, bulk import +- [Role-Based Access Control](./rbac) — configure custom roles before mapping +- [Audit Logs](./audit-logs) — track authentication events diff --git a/docs/enterprise/setting-up-okta.mdx b/docs/enterprise/setting-up-okta.mdx index 7b6f25f0bc..26ac5e8225 100644 --- a/docs/enterprise/setting-up-okta.mdx +++ b/docs/enterprise/setting-up-okta.mdx @@ -13,6 +13,7 @@ This guide walks you through configuring Okta as your identity provider for Bifr - An Okta organization with admin access - Bifrost Enterprise deployed and accessible - The redirect URI for your Bifrost instance (e.g., `https://your-bifrost-domain.com/login`) +- Ensure you have created all the [roles in Bifrost](/enterprise/rbac) that you are aiming to map to with Okta. --- @@ -47,20 +48,25 @@ Configure the following settings for your application: **General Settings:** + - **App integration name**: `Bifrost Enterprise` - **Logo** (optional): You can upload the Bifrost logo from [https://www.getmaxim.ai/bifrost/bifrost-logo-only.png](https://www.getmaxim.ai/bifrost/bifrost-logo-only.png) **Grant type:** + - Enable **Authorization Code** - Enable **Refresh Token** **Sign-in redirect URIs:** + - Add your Bifrost login callback URL: `https://your-bifrost-domain.com/login` **Sign-out redirect URIs (Optional):** + - Add your Bifrost base URL: `https://your-bifrost-domain.com` **Assignments:** + - Choose **Skip group assignment for now** (we'll configure this later) 6. Click **Save** to create the application @@ -71,39 +77,13 @@ Configure the following settings for your application: --- -## Step 3: Configure Authorization Server (optional) +## Step 3: Create Custom Role Attribute (Optional) -The default authorization server (`/oauth2/default`) is available to all Okta plans and **supports custom claims**, including role claims. The API Access Management paid add-on is only required to create additional custom authorization servers beyond the default. + You can map any attribute (include custom roles/groups) to assign roles to users. You can learn more about + [RBAC](/enterprise/rbac) docs. -Bifrost uses Okta's Authorization Server to issue tokens. You have three options: - -1. **Use `/oauth2/default` with role claims (recommended)** — Complete Steps 4-7 to configure custom role claims on the default authorization server. This enables automatic RBAC synchronization. - -2. **Use `/oauth2/default` without role claims** — Skip Steps 4-7. The first user to sign in automatically receives the Admin role and can manage RBAC for all subsequent users through the Bifrost dashboard. - -3. **Skip Step 3 entirely** — Authorization is not configured through Okta. You'll need an alternative authentication mechanism. - -### Configuring the Authorization Server - -1. Navigate to **Security** → **API** -2. Click on **Authorization Servers** - - - Okta Authorization Servers - - -3. Note the **Issuer URI** for your authorization server (e.g., `https://your-domain.okta.com/oauth2/default`) - - -The Issuer URI is used as the `issuerUrl` in your Bifrost configuration. Make sure to use the full URL including `/oauth2/default` (or your custom authorization server path). - - ---- - -## Step 4: Create Custom Role Attribute - To map Okta users to Bifrost roles (Admin, Developer, Viewer), you need to create a custom attribute. 1. Navigate to **Directory** → **Profile Editor** @@ -120,20 +100,20 @@ To map Okta users to Bifrost roles (Admin, Developer, Viewer), you need to creat Add custom attribute for bifrostRole -| Field | Value | -|-------|-------| -| **Data type** | string | -| **Display name** | bifrostRole | -| **Variable name** | bifrostRole | -| **Enum** | Check "Define enumerated list of values" | +| Field | Value | +| --------------------- | ----------------------------------------------------------- | +| **Data type** | string | +| **Display name** | bifrostRole | +| **Variable name** | bifrostRole | +| **Enum** | Check "Define enumerated list of values" | | **Attribute members** | Admin → `admin`, Developer → `developer`, Viewer → `viewer` | -| **Attribute type** | Personal | +| **Attribute type** | Personal | 5. Click **Save** --- -## Step 5: Add Role Claim to Tokens +## Step 4: Add Role Claim to Tokens (If you have added custom role attribute) Configure the authorization server to include the role in the access token. @@ -148,27 +128,29 @@ Configure the authorization server to include the role in the access token. Configure the claim: -| Field | Value | -|-------|-------| -| **Name** | `role` | +| Field | Value | +| ------------------------- | -------------------- | +| **Name** | `role` | | **Include in token type** | Access Token, Always | -| **Value type** | Expression | -| **Value** | `user.bifrostRole` | -| **Include in** | Any scope | +| **Value type** | Expression | +| **Value** | `user.bifrostRole` | +| **Include in** | Any scope | 5. Click **Create** -If you named your custom attribute differently, update the Value expression accordingly (e.g., `user.yourAttributeName`). + If you named your custom attribute differently, update the Value expression accordingly (e.g., + `user.yourAttributeName`). --- -## Step 6: Configure Groups for Team and Role Synchronization +## Step 5: Configure Groups Bifrost can automatically sync Okta groups for two purposes: + - **Team synchronization** — Groups are synced as Bifrost teams -- **Role mapping** — Groups can be mapped to Bifrost roles (Admin, Developer, Viewer) using Group-to-Role Mappings in the Bifrost UI +- **Role mapping** — Groups can be mapped to Bifrost roles (Admin, Developer, Viewer) using Group-to-Role Mappings in the Bifrost UI. ### Create Groups in Okta @@ -186,36 +168,12 @@ Bifrost can automatically sync Okta groups for two purposes: -Use a consistent naming convention for your groups. This makes it easier to configure group filters and role mappings later. + Use a consistent naming convention for your groups. This makes it easier to configure group filters and role mappings + later. ### Add Groups Claim to Tokens -You have two options for configuring the groups claim. Choose the one that best fits your Okta plan and requirements. - -#### Option A: Using App-Level Groups Claim (All Okta Plans) - -This approach configures the groups claim directly in your application's settings and works with all Okta plans, including free tiers. - -1. Navigate to your application's **Sign On** tab -2. Scroll down to the **OpenID Connect ID Token** section -3. Click **Edit** to modify the settings -4. Configure the **Groups claim filter**: - - **Groups claim type**: Filter - - **Groups claim filter**: Set a claim name (e.g., `groups`) and filter condition (e.g., "Starts with" `bifrost-staging`) - - - Application Groups claim configuration - - -5. Click **Save** - - -The filter ensures only relevant groups are included in the token. Adjust the filter condition based on your group naming convention. - - -#### Option B: Using Authorization Server Groups Claim - This approach adds the groups claim through your authorization server, providing more flexibility for complex configurations. 1. Navigate to **Security** → **API** → **Authorization Servers** @@ -225,35 +183,19 @@ This approach adds the groups claim through your authorization server, providing Configure the groups claim: -| Field | Value | -|-------|-------| -| **Name** | `groups` | -| **Include in token type** | ID Token, Always | -| **Value type** | Groups | -| **Filter** | Matches regex: `.*` (or specify a prefix like `bifrost-.*`) | -| **Include in** | Any scope | +| Field | Value | +| ------------------------- | ----------------------------------------------------------- | +| **Name** | `groups` | +| **Include in token type** | ID Token, Always | +| **Value type** | Groups | +| **Filter** | Matches regex: `.*` (or specify a prefix like `bifrost-.*`) | +| **Include in** | Any scope | 5. Click **Create** -You can also configure an additional groups claim in the application's Sign On settings: - -1. Navigate to your application's **Sign On** tab - - - Application Sign On configuration - - -2. Under **OpenID Connect ID Token**, configure: - - **Groups claim type**: Expression - - **Groups claim expression**: `Arrays.flatten(Groups.startsWith("OKTA", "bifrost", 100))` - - -Adjust the group filter expression based on your naming convention. The example above includes groups starting with "bifrost". - - --- -## Step 7: Assign Users to the Application +## Step 6: Assign Users to the Application 1. Navigate to your application's **Assignments** tab @@ -263,7 +205,9 @@ Adjust the group filter expression based on your naming convention. The example 2. Click **Assign** → **Assign to People** or **Assign to Groups** -3. For each user, set their **bifrostRole**: +### For Assigning Roles (If step 3 and step 4 are followed) + +For each user, set their **bifrostRole** (if you are planning to do role-level mapping): Assign custom role to user @@ -271,72 +215,121 @@ Adjust the group filter expression based on your naming convention. The example 4. Click **Save and Go Back** - -Role claims are available only when you configure custom claims on your authorization server. Ensure you add role claims to your chosen authorization server (for example, `/oauth2/default`) to enable RBAC. If you skipped Steps 4-7, the first user to sign in automatically receives the **Admin** role and can manage RBAC for all subsequent users through the Bifrost dashboard. - - --- +## Step 7: Create API token for bulk user and team sync + +To create an API token, navigate to **Security** → **API** → **Tokens**. + + + Okta API tokens screen + + +1. Click on "Create token" + + + Create token dialog in Okta + + +2. Copy token to be used in the next step. + ## Step 8: Configure Bifrost Now configure Bifrost to use Okta as the identity provider. ### Using the Bifrost UI + + Create token dialog in Okta + + 1. Navigate to **Governance** → **User Provisioning** in your Bifrost dashboard 2. Select **Okta** as the SCIM Provider 3. Enter the following configuration: -| Field | Value | -|-------|-------| -| **Client ID** | Your Okta application Client ID | -| **Issuer URL** | Issuer URL | -| **Audience** | Your API audience (e.g., `api://default` or custom) | +| Field | Value | +| ----------------- | -------------------------------------------------------------------- | +| **Client ID** | Your Okta application Client ID | +| **Issuer URL** | Issuer URL | +| **Audience** | Your API audience (e.g., `api://default` or custom) | | **Client Secret** | Your Okta application Client Secret (optional, for token revocation) | -4. Toggle **Enabled** to activate the provider -5. Click **Save Configuration** +4. **Verify** configuration and see if you get any errors. Make sure you get no errors/warnings. +5. Toggle **Enabled** to activate the provider +6. Click **Save Configuration** + + +After saving, you'll need to restart your Bifrost server for the changes to take effect. + + +### Attribute Mappings -### Group-to-Role Mappings (Optional) +Attribute mappings let you translate Okta claim values into Bifrost roles, teams, or business units without restructuring your Okta claims. Bifrost supports three mapping types: -If you configured groups in Okta (Step 6), you can map Okta group names directly to Bifrost roles. This is an alternative to using custom role claims (Steps 4-5) and works with all Okta plans. +- **`attributeRoleMappings`**: map a claim value to a Bifrost role (Admin, Developer, Viewer, or a custom role) +- **`attributeTeamMappings`**: map a claim value to a Bifrost team +- **`attributeBusinessUnitMappings`**: map a claim value to a Bifrost business unit -1. In the User Provisioning configuration, scroll down to **Group-to-Role Mappings** -2. Click **Add Mapping** -3. Enter the **Group Claim Name** exactly as it appears in your Okta groups (e.g., `bifrost-staging-admins`) -4. Select the corresponding **Role** (Admin, Developer, or Viewer) -5. Repeat for each group you want to map +These mappings work with any Okta claim — the `groups` claim from Step 5, the custom `role` claim from Step 4, or any other claim your authorization server includes in the token (e.g., `department`, `organization`). + +To configure attribute mappings: + +1. In the User Provisioning configuration, scroll down to **Attribute Mappings** +2. Click **Add Mapping** under the relevant mapping type (Role, Team, or Business Unit) +3. Enter the **Attribute** (the claim name from the token), the **Value** to match, and the target **Role**, **Team**, or **Business Unit** +4. Repeat for each rule you need - Group-to-Role Mappings configuration in Bifrost + Attribute Mappings configuration in Bifrost -| Group Claim Name | Role | -|------------------|------| -| `bifrost-staging-admins` | Admin | -| `bifrost-staging-viewers` | Viewer | + + When you mark value as "*" - the claim value is mapped as is to the entity name. Values comparisons are case-insensitive. + + +### Custom attribute mapping + +You can also map any custom attributes to any entity (role, team or business unit). Make sure these are configured to send back to Bifrost in token configuration. + + + Attribute Mappings configuration in Bifrost + + + +#### Evaluation rules + +- **Role mappings**: Ordered, first match wins. If no rule matches, users are not allowed to login into the system. +- **Team and business unit mappings**: All matching rules apply — users can be placed on multiple teams and business units simultaneously. +- **Claim values**: Can be strings, arrays, or nested objects. Bifrost resolves dotted paths (e.g., `realm_access.roles`). -If a user belongs to multiple groups with different role mappings, the highest privilege role is assigned. If no mapping matches, the first user to sign in receives the **Admin** role, and subsequent users receive the **Viewer** role. + If a user matches multiple role mapping rules, the highest privilege role is assigned. If no + mapping matches, the first user to sign in receives the **Admin** role, and subsequent users receive the **Viewer** + role. -6. Click **Save Configuration** +5. Click **Save Configuration** - -After saving, you'll need to restart your Bifrost server for the changes to take effect. - +After saving, you'll need to restart your Bifrost server for the changes to take effect. ### Configuration Reference -| Field | Required | Description | -|-------|----------|-------------| -| `issuerUrl` | Yes | Okta authorization server URL (e.g., `https://your-domain.okta.com/oauth2/default`) | -| `clientId` | Yes | Application Client ID from Okta | -| `clientSecret` | No | Application Client Secret (enables token revocation) | -| `audience` | Yes | API audience identifier from your authorization server | -| `userIdField` | No | JWT claim for user ID (default: `uid`) | -| `rolesField` | No | JWT claim for roles (default: `roles`) | -| `teamIdsField` | No | JWT claim for group/team IDs (default: `groups`) | +| Field | Required | Description | +| ------------------------------- | -------- | ----------------------------------------------------------------------------------- | +| `issuerUrl` | Yes | Okta authorization server URL (e.g., `https://your-domain.okta.com/oauth2/default`) | +| `clientId` | Yes | Application Client ID from Okta | +| `clientSecret` | Yes | Application Client Secret (enables token revocation) | +| `audience` | Yes | API audience identifier from your authorization server | +| `attributeRoleMappings` | Yes | Ordered list of attribute→role mappings. First match wins. | +| `attributeTeamMappings` | No | Attribute→team mappings (all matches apply). | +| `attributeBusinessUnitMappings` | No | Attribute→business-unit mappings (all matches apply). | + --- @@ -358,22 +351,10 @@ After saving, you'll need to restart your Bifrost server for the changes to take - Check that the Bifrost server was restarted after configuration - Ensure the Issuer URL is correct and accessible -### "Invalid audience" error - -- Verify the `audience` field matches your Okta authorization server's audience -- Check if you're using a custom authorization server and update the issuer URL accordingly - -### Roles not syncing -- Ensure the `role` claim is configured in your authorization server -- Verify users have the `bifrostRole` attribute set -- Check that the claim is included in the access token (use Okta's Token Preview feature) +### Attribute mapping is not working -### Groups not appearing as teams - -- Verify the `groups` claim is configured in your authorization server -- Ensure users are assigned to the relevant groups -- Check that groups are assigned to the application +- Verify that token configuration includes all the attributes used for mapping. ### Token refresh failing @@ -384,7 +365,7 @@ After saving, you'll need to restart your Bifrost server for the changes to take ## Next Steps +- **[User Provisioning (SCIM)](./user-provisioning)** - Overview of SCIM in Bifrost and alternative identity providers - **[Advanced Governance](./advanced-governance)** - Learn about user budgets and compliance features - **[Role-Based Access Control](./advanced-governance#role-hierarchy)** - Understand the Admin, Developer, Viewer hierarchy - **[Audit Logs](./audit-logs)** - Monitor user authentication and activity - diff --git a/docs/enterprise/setting-up-zitadel.mdx b/docs/enterprise/setting-up-zitadel.mdx new file mode 100644 index 0000000000..03fcffe81e --- /dev/null +++ b/docs/enterprise/setting-up-zitadel.mdx @@ -0,0 +1,246 @@ +--- +title: "Setting up Zitadel" +description: "Step-by-step guide to configure Zitadel (cloud or self-hosted) as your identity provider for Bifrost Enterprise SSO and user provisioning." +icon: "cloud" +--- + +## Overview + +This guide walks you through configuring [Zitadel](https://zitadel.com) as your identity provider for Bifrost Enterprise. Zitadel uses standard OIDC with JWKS-based JWT validation, plus a separate **service account** for user provisioning (Zitadel web applications cannot perform the `client_credentials` grant — a dedicated service account is required for directory-level reads). + +After completing this guide users will sign in to Bifrost with their Zitadel credentials, and admins can bulk-import users and teams from the Zitadel Management API. + +## Prerequisites + +- A Zitadel instance (cloud at `*.zitadel.cloud` or self-hosted) with admin access +- An existing Zitadel **Project** in the organization you want to connect +- Bifrost Enterprise deployed and accessible +- The redirect URI for your Bifrost instance (e.g. `https://your-bifrost-domain.com/login`) +- Bifrost [roles](./rbac) created for the roles you plan to map (Admin, Developer, Viewer, or custom) + +--- + +## Step 1: Create a Web Application + +The Web Application is what end users log in through. + +1. Open the Zitadel Console and choose **Projects → your project → New Application**. + + + Creating a new application in Zitadel + + + +2. Configure the application: + +| Field | Value | +| --- | --- | +| **Type** | Web | +| **Authentication method** | PKCE (recommended) or Basic | +| **Redirect URI** | `https://your-bifrost-domain.com/login` | +| **Post logout URI** | `https://your-bifrost-domain.com` | + +3. After creating the app, note the **Client ID** from the application detail page. + + + Zitadel application Client ID + + +4. If you chose a confidential authentication method, also copy the **Client Secret** (shown once). + +--- + +## Step 2: Enable role claims on the project (Optional) + + + In Bifrost, you can map any attribute to role. If you decide to map Zitadel project roles, then follow step 2 and step 3. + + +Zitadel only emits role claims in access tokens when the project is configured to assert them. + +1. Open **Projects → your project → General**. +2. Enable **Assert Roles on Authentication**. +3. Enable **Check Authorization on Authentication** if you want to enforce that every user has at least one project role. + + + Assert Roles on Authentication toggle in Zitadel + + +4. Note the **Project ID** — you'll need it for the Bifrost config so Bifrost can resolve the correct project roles. + + + Without **Assert Roles on Authentication**, the token will not contain role claims and every user will fall back to the default role (Viewer, or Admin for the first signin). + + +--- + +## Step 3: Create project roles (Optional) + + + Create project roles in Zitadel + + + +1. In the same project, open the **Roles** tab and create a role for each Bifrost role you plan to map. Common pattern: +2. Authorize users to the relevant roles via **Users → Roles**. + + + Create project roles in Zitadel + + + +--- + +## Step 4: Create a service account for provisioning + +Web apps in Zitadel cannot use the `client_credentials` grant. Bifrost needs a dedicated service account to list users via the Management API. + +1. Navigate to **Users → Service Accounts → New**. + + + Creating a service account in Zitadel + + +2. Name it (e.g. `bifrost-provisioning`) and create it. +3. Open the service account → **Actions → Generate Client Secret**. + + + Creating a service account in Zitadel + + +4. **Copy the Client ID and Client Secret immediately** — the secret is shown only once. + + + Store the service account Client Secret in your password manager. It cannot be retrieved after this screen. + + +5. Grant the service account **IAM_USER_READ** (or **Org Owner** if you want broader visibility): + - Organization → **Managers → Add Manager** → select the service account → role `IAM_USER_READ`. + + + Assigning IAM_USER_READ to the service account + + +--- + +## Step 5: App token settings + +1. Change **Auth Token Type** to JWT. +2. Enable roles and profile info in ID token. + + + Assigning IAM_USER_READ to the service account + + + +## Step 6: Configure Bifrost + +### Using the Bifrost dashboard + + +1. In Bifrost, go to **Governance → User Provisioning**. +2. Select **Zitadel** as the SCIM Provider. +3. Fill in the fields: + +| Field | Value | +| --- | --- | +| **Domain** | Your Zitadel host, e.g. `my-instance.zitadel.cloud` or `auth.company.com` (no scheme, no path) | +| **Project ID** | The project ID from Step 2 | +| **Client ID** | Web Application Client ID from Step 1 | +| **Client Secret** | Web Application Client Secret from Step 1 (optional for PKCE) | +| **Audience** | Optional access-token audience override | +| **Service Account Client ID** | From Step 4 | +| **Service Account Client Secret** | From Step 4 | + +4. Click **Verify** — Bifrost connects to Zitadel's JWKS and service account token endpoints to confirm the credentials. +5. Configure **Attribute → Role / Team / Business Unit** mappings if you need to translate project roles or metadata into Bifrost roles. +6. Toggle **Enabled** and click **Save Configuration**. + + + Creating a service account in Zitadel + + +### Using `config.json` + +```json +{ + "scim_config": { + "enabled": true, + "provider": "zitadel", + "config": { + "domain": "my-instance.zitadel.cloud", + "projectId": "123456789012345678", + "clientId": "123456789012345678@my-project", + "clientSecret": "${ZITADEL_CLIENT_SECRET}", + "serviceAccountClientId": "987654321098765432@my-project", + "serviceAccountClientSecret": "${ZITADEL_SA_CLIENT_SECRET}", + "teamIdsField": "groups" + } + } +} +``` + +### Custom attribute mapping + +You can also map any custom attributes to any entity (role, team or business unit). Make sure these are configured to send back to Bifrost in token configuration. + + + Attribute Mappings configuration in Bifrost + + +### Configuration reference + +| Field | Required | Description | +| --- | --- | --- | +| `domain` | Yes | Zitadel instance host (no scheme). Examples: `my-instance.zitadel.cloud`, `auth.company.com`. | +| `clientId` | Yes | Client ID of the Web Application used for user login. | +| `clientSecret` | Yes | Web Application secret. Omit for PKCE-only flows. | +| `projectId` | Yes | Required to resolve project-scoped role claims and sync role grants. | +| `audience` | No | Override the expected JWT `aud` claim. | +| `serviceAccountClientId` | Yes | Service account used to list users via the Management API. | +| `serviceAccountClientSecret` | Yes | Service account secret (shown once in Zitadel). | +| `attributeRoleMappings` | Yes | Ordered list of attribute→role mappings. | +| `attributeTeamMappings` | No | Attribute→team mappings (all matches apply). | +| `attributeBusinessUnitMappings` | No | Attribute→business-unit mappings (all matches apply). | + +--- + +## Testing the Integration + +1. Open the Bifrost dashboard in an incognito window. +2. You'll be redirected to Zitadel. Sign in with a user who has a project authorization. +3. On successful login you return to Bifrost and appear in **Governance → Users** with the correct role. +4. From **Governance → User Provisioning → Import Users**, verify you can preview and import additional users via the service account. + +--- + +## Troubleshooting + +### `role claims missing in token` + +- Enable **Assert Roles on Authentication** on the project (Step 2). +- Ensure the user has an active authorization for the project. + +### `invalid audience` when validating the JWT + +- Check the `audience` field in the Bifrost config. It must match the `aud` claim issued by Zitadel. Leaving it empty uses the default (the project's resource owner). + +### Service account cannot list users + +- Confirm the service account has **IAM_USER_READ** or **Org Owner** role in the organization. +- Regenerate the client secret if you've lost it — the original secret cannot be retrieved. + +### Redirect URI mismatch + +- Zitadel requires an exact string match. Check for trailing slashes and `http` vs `https`. + +--- + +## Next Steps + +- [User Provisioning overview](./user-provisioning) — capabilities, attribute mappings, bulk import +- [Role-Based Access Control](./rbac) — configure custom roles before mapping +- [Audit Logs](./audit-logs) — track authentication events diff --git a/docs/enterprise/user-provisioning.mdx b/docs/enterprise/user-provisioning.mdx new file mode 100644 index 0000000000..14a7e182b8 --- /dev/null +++ b/docs/enterprise/user-provisioning.mdx @@ -0,0 +1,197 @@ +--- +title: "User Provisioning (SCIM)" +description: "Authenticate users, sync teams, and provision roles and business units from your identity provider using SCIM-backed OAuth 2.0 / OIDC flows." +icon: "users-gear" +--- + +## Overview + +Bifrost Enterprise uses **SCIM-backed identity provisioning** to connect your organization's identity provider to Bifrost. A single configuration gives you: + +- **Single sign-on (SSO)** via OAuth 2.0 / OIDC with JWKS-based JWT validation +- **Automatic role assignment** using custom claims, app roles, or group-to-role mappings +- **Team synchronization** from IdP groups into Bifrost teams +- **Business unit mapping** from IdP attributes to Bifrost business units +- **Bulk user provisioning** with filter-preview before import +- **Silent token refresh** using server-stored refresh tokens + +Once configured, users sign in to Bifrost with their corporate credentials and inherit the right [role and permissions](./rbac) immediately — no manual account creation. + + + User Provisioning overview in Bifrost dashboard + + +--- + +## Supported Identity Providers + +Pick your IdP to follow a step-by-step setup guide. All providers share the same Bifrost configuration surface — the only difference is how the OAuth client and role/group claims are created on the provider side. + + + + OIDC with Org or Custom Authorization Servers, plus group-to-role mapping and API tokens for bulk user sync. + + + Entra ID (Azure AD) with app roles, group claims, and v1.0 / v2.0 token support. + + + Cloud or self-hosted Zitadel with project-scoped role claims and service-account-based provisioning. + + + Google Workspace domains with OAuth login plus optional Directory API sync via a service account. + + + +--- + +## How it works + + + SCIM authentication and provisioning flow + + +1. **Login** — Bifrost redirects unauthenticated users to the provider's authorization endpoint (Authorization Code flow). +2. **Token exchange** — on callback, Bifrost exchanges the code for an access token and refresh token, stores them in an `HttpOnly` cookie / server session, and validates the JWT against the provider's JWKS. +3. **Identity extraction** — configurable JWT claims (`userIdField`, `rolesField`, `teamIdsField`) are mapped to a Bifrost user, role, and teams. Provider-specific app roles or custom attributes override claim lookup. +4. **Attribute mapping** — optional `attributeRoleMappings`, `attributeTeamMappings`, and `attributeBusinessUnitMappings` translate arbitrary claim values (e.g., a department string or Okta group name) into Bifrost roles, teams, or business units. +5. **Bulk import** — admins can preview users matching a filter and bulk-import them via the dashboard, which calls the provider's user directory API. +6. **Silent refresh** — when the access token expires, Bifrost uses the stored refresh token to mint a new one without requiring re-login. + +--- + +## Capabilities + +| Capability | Description | +| --- | --- | +| **OAuth 2.0 / OIDC SSO** | Authorization Code + PKCE with configurable scopes (`openid profile email offline_access`). | +| **JWKS validation** | JWTs are validated against the provider's published JWKS keys; configuration is cached and auto-refreshed. | +| **Role mapping** | Map from a claim value (string or array) to Admin / Developer / Viewer or a custom role. Highest-privilege wins when multiple match. | +| **Team mapping** | Map multiple claim values to Bifrost teams in a single pass (a user can belong to many teams). | +| **Business unit mapping** | Same as team mapping but scoped to business units. | +| **Provisioning preview** | Preview up to 50 users matching filters (groups, roles, departments) before importing. | +| **Bulk import** | Import matched users into Bifrost with role + team + BU assignments applied. | +| **Team sync** | Sync IdP groups as Bifrost teams with a single action. | +| **Business unit sync** | Sync IdP organizational units as Bifrost business units. | +| **Deprovisioning** | Re-running import reconciles removed users and updates role / team assignments. | +| **API key pass-through** | Requests using Bifrost API keys (`bfst-*`) bypass SCIM middleware so inference traffic is not affected. | + +--- + +## Configuration reference + +All providers share the same outer config shape in `config.json`: + +```json +{ + "scim_config": { + "enabled": true, + "provider": "okta | entra | zitadel | keycloak | google", + "config": { + "...": "provider-specific fields — see each IdP guide" + } + } +} +``` + +Shared fields across providers: + +| Field | Required | Description | +| --- | --- | --- | +| `clientId` | Yes | OAuth client ID from the identity provider. | +| `clientSecret` | Usually | Client secret. Required for confidential clients and (where applicable) token revocation. | +| `audience` | Optional | JWT audience to validate against. Defaults vary per provider. | +| `attributeRoleMappings` | Optional | Ordered list of `{ attribute, value, role }` rules evaluated top-to-bottom. | +| `attributeTeamMappings` | Optional | List of `{ attribute, value, team }` rules (all matches apply). | +| `attributeBusinessUnitMappings` | Optional | List of `{ attribute, value, businessUnit }` rules (all matches apply). | + +Provider-specific fields (domain, tenant ID, server URL, service-account credentials) are documented in each IdP's setup guide. + + + Changing `scim_config` at runtime through the UI is applied after saving. For file-based configuration, restart the Bifrost server to pick up changes. + + +--- + +## Configuring from the dashboard + +1. Navigate to **Governance → User Provisioning** in the Bifrost dashboard. +2. Select your identity provider from the **SCIM Provider** dropdown. +3. Fill in the provider-specific fields. Required fields are marked and validated on **Verify**. + + + Selecting a SCIM provider in the Bifrost dashboard + + +4. Click **Verify** to test credentials end-to-end. Bifrost will reach the provider's JWKS / directory endpoint and report any failures. +5. Configure **Attribute → Role / Team / Business Unit** mappings as needed. +6. Toggle **Enabled** and click **Save Configuration**. + + + After enabling a new provider, the next dashboard load redirects to your IdP for login. Test in an incognito window first to avoid being locked out of your current session. + + +--- + +## Attribute mappings + +Attribute mappings let you translate claim values into Bifrost roles, teams, or business units without forcing your IdP admins to restructure claim names. + + + Preview of users matching an import filter + + +Each mapping is an ordered rule: + +```json +{ + "attribute": "department", + "value": "Engineering", + "role": "developer" +} +``` + +Rules are evaluated top-to-bottom: +- **Role mappings** — first match wins. Set a fallback with `"attribute": "*"` at the end. +- **Team mappings** and **business unit mappings** — all matching rules apply, so a user with `department=Platform` and `group=sre` can be placed on multiple teams. + +Claim values can be strings, arrays, or nested objects — Bifrost resolves dotted paths (e.g., `realm_access.roles`). + +--- + +## Bulk user provisioning + +Once SCIM is enabled, import users in bulk from your IdP: + +1. Go to **Governance → User Provisioning → Import Users**. +2. Select a filter — groups, roles, departments, or a custom query depending on provider support. +3. Click **Preview** to see up to 50 matching users. +4. Click **Import** to create them in Bifrost with role / team / BU assignments applied. + + + Preview of users matching an import filter + + +Re-running an import reconciles existing users — role and team changes in the IdP are reflected on the next import. + +--- + +## Troubleshooting + +| Symptom | Likely cause | +| --- | --- | +| Access denied: no application role or group mapping is assigned to this user. | Make sure you have assigned user to the Bifrost IdP application and they have a valid group/attribute mapping to role in Bifrost | +| Redirect loop on login | Make sure you have restarted pods/Bifrost instance after changing SCIM configuration, or check for a redirect URI mismatch. Exact string match required — check trailing slashes and `http` vs `https`. | +| `invalid audience` | `audience` field does not match the access token's `aud` claim. Use the same value your IdP issues. | +| Empty roles / teams | Claim mapping is off. Verify the JWT at [jwt.io](https://jwt.io) and check `rolesField` / `teamIdsField`. | +| Token refresh failing | `offline_access` scope missing or refresh token revoked. Re-enable the scope and re-authenticate. | +| First user gets Admin | By design — if no matching role mapping applies, the first user is promoted to Admin so they can finish configuration. Subsequent users default to Viewer. | + +Provider-specific troubleshooting lives in each IdP's guide. + +--- + +## Related + +- [Role-Based Access Control](./rbac) — permissions model and custom roles +- [Advanced Governance](./advanced-governance) — budgets, limits, and compliance +- [Audit Logs](./audit-logs) — track authentication events and role changes diff --git a/docs/features/governance/budget-and-limits.mdx b/docs/features/governance/budget-and-limits.mdx index 1a012b7904..b295565e72 100644 --- a/docs/features/governance/budget-and-limits.mdx +++ b/docs/features/governance/budget-and-limits.mdx @@ -345,59 +345,78 @@ Configure provider-level governance through Bifrost's configuration file for dec "governance": { "virtual_keys": [ { + "id": "vk-dev-001", "name": "development-team-vk", "description": "Development team with multi-provider setup", + "is_active": true, + "rate_limit_id": "rl-vk-dev", "provider_configs": [ { + "id": 1, "provider": "openai", "weight": 0.6, "allowed_models": ["gpt-4", "gpt-3.5-turbo"], - "budget": { - "max_limit": 1000.00, - "reset_duration": "1M" - }, - "rate_limit": { - "token_max_limit": 2000000, - "token_reset_duration": "1h", - "request_max_limit": 2000, - "request_reset_duration": "1h" - } + "rate_limit_id": "rl-pc-openai" }, { + "id": 2, "provider": "anthropic", "weight": 0.4, "allowed_models": ["claude-3-opus", "claude-3-sonnet"], - "budget": { - "max_limit": 500.00, - "reset_duration": "1M" - }, - "rate_limit": { - "token_max_limit": 1000000, - "token_reset_duration": "1h", - "request_max_limit": 1000, - "request_reset_duration": "1h" - } + "rate_limit_id": "rl-pc-anthropic" } - ], - "budget": { - "max_limit": 2000.00, - "reset_duration": "1M", - "calendar_aligned": true - }, - "rate_limit": { - "token_max_limit": 5000000, - "token_reset_duration": "1h", - "request_max_limit": 3000, - "request_reset_duration": "1h" - }, - "is_active": true + ] + } + ], + "budgets": [ + { + "id": "budget-vk-dev", + "virtual_key_id": "vk-dev-001", + "max_limit": 2000.00, + "reset_duration": "1M", + "calendar_aligned": true + }, + { + "id": "budget-pc-openai", + "provider_config_id": 1, + "max_limit": 1000.00, + "reset_duration": "1M" + }, + { + "id": "budget-pc-anthropic", + "provider_config_id": 2, + "max_limit": 500.00, + "reset_duration": "1M" + } + ], + "rate_limits": [ + { + "id": "rl-vk-dev", + "token_max_limit": 5000000, + "token_reset_duration": "1h", + "request_max_limit": 3000, + "request_reset_duration": "1h" + }, + { + "id": "rl-pc-openai", + "token_max_limit": 2000000, + "token_reset_duration": "1h", + "request_max_limit": 2000, + "request_reset_duration": "1h" + }, + { + "id": "rl-pc-anthropic", + "token_max_limit": 1000000, + "token_reset_duration": "1h", + "request_max_limit": 1000, + "request_reset_duration": "1h" } ] } } ``` -Optional `calendar_aligned` on each `budget` matches the HTTP API and [calendar-aligned behavior](#calendar-aligned-budgets). +Budgets and rate limits live as **separate top-level arrays** inside `governance`. Virtual keys and provider configs reference them by id (`rate_limit_id`) or are referenced back (`virtual_key_id` / `provider_config_id` on each `budgets[]` entry). Optional `calendar_aligned` on each `budget` matches the HTTP API and [calendar-aligned behavior](#calendar-aligned-budgets). ### Advanced Configuration Examples @@ -407,34 +426,21 @@ Optional `calendar_aligned` on each `budget` matches the HTTP API and [calendar- "governance": { "virtual_keys": [ { + "id": "vk-cost-opt", "name": "cost-optimized-vk", "provider_configs": [ - { - "provider": "openai-gpt-3.5", - "weight": 0.8, - "budget": { - "max_limit": 50.00, - "reset_duration": "1d" - }, - "rate_limit": { - "request_max_limit": 1000, - "request_reset_duration": "1h" - } - }, - { - "provider": "openai-gpt-4", - "weight": 0.2, - "budget": { - "max_limit": 200.00, - "reset_duration": "1d" - }, - "rate_limit": { - "request_max_limit": 100, - "request_reset_duration": "1h" - } - } + {"id": 10, "provider": "openai-gpt-3.5", "weight": 0.8, "rate_limit_id": "rl-cheap"}, + {"id": 11, "provider": "openai-gpt-4", "weight": 0.2, "rate_limit_id": "rl-premium"} ] } + ], + "budgets": [ + {"id": "b-cheap", "provider_config_id": 10, "max_limit": 50.00, "reset_duration": "1d"}, + {"id": "b-premium", "provider_config_id": 11, "max_limit": 200.00, "reset_duration": "1d"} + ], + "rate_limits": [ + {"id": "rl-cheap", "request_max_limit": 1000, "request_reset_duration": "1h"}, + {"id": "rl-premium", "request_max_limit": 100, "request_reset_duration": "1h"} ] } } @@ -446,52 +452,24 @@ Optional `calendar_aligned` on each `budget` matches the HTTP API and [calendar- "governance": { "virtual_keys": [ { + "id": "vk-prod-hv", "name": "production-high-volume-vk", "provider_configs": [ - { - "provider": "openai", - "weight": 0.5, - "budget": { - "max_limit": 5000.00, - "reset_duration": "1M" - }, - "rate_limit": { - "token_max_limit": 10000000, - "token_reset_duration": "1h", - "request_max_limit": 10000, - "request_reset_duration": "1h" - } - }, - { - "provider": "anthropic", - "weight": 0.3, - "budget": { - "max_limit": 3000.00, - "reset_duration": "1M" - }, - "rate_limit": { - "token_max_limit": 6000000, - "token_reset_duration": "1h", - "request_max_limit": 6000, - "request_reset_duration": "1h" - } - }, - { - "provider": "azure-openai", - "weight": 0.2, - "budget": { - "max_limit": 2000.00, - "reset_duration": "1M" - }, - "rate_limit": { - "token_max_limit": 4000000, - "token_reset_duration": "1h", - "request_max_limit": 4000, - "request_reset_duration": "1h" - } - } + {"id": 20, "provider": "openai", "weight": 0.5, "rate_limit_id": "rl-openai"}, + {"id": 21, "provider": "anthropic", "weight": 0.3, "rate_limit_id": "rl-anthropic"}, + {"id": 22, "provider": "azure-openai", "weight": 0.2, "rate_limit_id": "rl-azure"} ] } + ], + "budgets": [ + {"id": "b-openai", "provider_config_id": 20, "max_limit": 5000.00, "reset_duration": "1M"}, + {"id": "b-anthropic", "provider_config_id": 21, "max_limit": 3000.00, "reset_duration": "1M"}, + {"id": "b-azure", "provider_config_id": 22, "max_limit": 2000.00, "reset_duration": "1M"} + ], + "rate_limits": [ + {"id": "rl-openai", "token_max_limit": 10000000, "token_reset_duration": "1h", "request_max_limit": 10000, "request_reset_duration": "1h"}, + {"id": "rl-anthropic", "token_max_limit": 6000000, "token_reset_duration": "1h", "request_max_limit": 6000, "request_reset_duration": "1h"}, + {"id": "rl-azure", "token_max_limit": 4000000, "token_reset_duration": "1h", "request_max_limit": 4000, "request_reset_duration": "1h"} ] } } @@ -514,20 +492,23 @@ A virtual key configured with multiple providers and different budget allocation ```json { - "name": "marketing-team-vk", - "budget": { "max_limit": 100, "reset_duration": "1M" }, - "provider_configs": [ - { - "provider": "openai", - "weight": 0.7, - "budget": { "max_limit": 50, "reset_duration": "1M" } - }, - { - "provider": "anthropic", - "weight": 0.3, - "budget": { "max_limit": 30, "reset_duration": "1M" } - } - ] + "governance": { + "virtual_keys": [ + { + "id": "vk-mkt", + "name": "marketing-team-vk", + "provider_configs": [ + {"id": 30, "provider": "openai", "weight": 0.7}, + {"id": 31, "provider": "anthropic", "weight": 0.3} + ] + } + ], + "budgets": [ + {"id": "b-vk-mkt", "virtual_key_id": "vk-mkt", "max_limit": 100, "reset_duration": "1M"}, + {"id": "b-openai", "provider_config_id": 30, "max_limit": 50, "reset_duration": "1M"}, + {"id": "b-anth", "provider_config_id": 31, "max_limit": 30, "reset_duration": "1M"} + ] + } } ``` @@ -542,27 +523,22 @@ Different rate limits based on provider capabilities: ```json { - "name": "high-volume-vk", - "provider_configs": [ - { - "provider": "openai", - "rate_limit": { - "request_max_limit": 1000, - "request_reset_duration": "1h", - "token_max_limit": 1000000, - "token_reset_duration": "1h" - } - }, - { - "provider": "anthropic", - "rate_limit": { - "request_max_limit": 500, - "request_reset_duration": "1h", - "token_max_limit": 500000, - "token_reset_duration": "1h" + "governance": { + "virtual_keys": [ + { + "id": "vk-hv", + "name": "high-volume-vk", + "provider_configs": [ + {"id": 40, "provider": "openai", "rate_limit_id": "rl-openai"}, + {"id": 41, "provider": "anthropic", "rate_limit_id": "rl-anthropic"} + ] } - } - ] + ], + "rate_limits": [ + {"id": "rl-openai", "request_max_limit": 1000, "request_reset_duration": "1h", "token_max_limit": 1000000, "token_reset_duration": "1h"}, + {"id": "rl-anthropic", "request_max_limit": 500, "request_reset_duration": "1h", "token_max_limit": 500000, "token_reset_duration": "1h"} + ] + } } ``` @@ -577,25 +553,25 @@ Provider configurations with budget-based failover: ```json { - "name": "cost-optimized-vk", - "provider_configs": [ - { - "provider": "openai-cheap", - "weight": 1.0, - "budget": { "max_limit": 10, "reset_duration": "1d" } - }, - { - "provider": "openai-premium", - "weight": 0.0, - "budget": { "max_limit": 50, "reset_duration": "1d" }, - "rate_limit": { - "request_max_limit": 100, - "request_reset_duration": "1h", - "token_max_limit": 50000, - "token_reset_duration": "1h" + "governance": { + "virtual_keys": [ + { + "id": "vk-cost", + "name": "cost-optimized-vk", + "provider_configs": [ + {"id": 50, "provider": "openai-cheap", "weight": 1.0}, + {"id": 51, "provider": "openai-premium", "weight": 0.0, "rate_limit_id": "rl-premium"} + ] } - } - ] + ], + "budgets": [ + {"id": "b-cheap", "provider_config_id": 50, "max_limit": 10, "reset_duration": "1d"}, + {"id": "b-premium", "provider_config_id": 51, "max_limit": 50, "reset_duration": "1d"} + ], + "rate_limits": [ + {"id": "rl-premium", "request_max_limit": 100, "request_reset_duration": "1h", "token_max_limit": 50000, "token_reset_duration": "1h"} + ] + } } ``` diff --git a/docs/features/governance/virtual-keys.mdx b/docs/features/governance/virtual-keys.mdx index 8ae28e42ce..6c8cff9320 100644 --- a/docs/features/governance/virtual-keys.mdx +++ b/docs/features/governance/virtual-keys.mdx @@ -169,7 +169,8 @@ curl -X DELETE http://localhost:8080/api/governance/virtual-keys/{vk_id} { "provider": "openai", "weight": 0.5, - "allowed_models": ["gpt-4o-mini"] + "allowed_models": ["gpt-4o-mini"], + "key_ids": ["openai-primary"] }, { "provider": "anthropic", @@ -178,11 +179,7 @@ curl -X DELETE http://localhost:8080/api/governance/virtual-keys/{vk_id} } ], "team_id": "team-eng-001", - "budget_id": "budget-eng-vk", - "rate_limit_id": "rate-limit-eng-vk", - "keys": [ - {"key_id": "8c52039e-38c6-48b2-8016-0bd884b7befb"} - ] + "rate_limit_id": "rate-limit-eng-vk" }, { "id": "vk-002", @@ -202,16 +199,13 @@ curl -X DELETE http://localhost:8080/api/governance/virtual-keys/{vk_id} "allowed_models": ["claude-3-opus-20240229"] } ], - "customer_id": "customer-acme-corp", - "budget_id": "budget-exec-vk", - "keys": [ - {"key_id": "8c52039e-38c6-48b2-8016-0bd884b7befb"} - ] + "customer_id": "customer-acme-corp" } ], "budgets": [ { "id": "budget-eng-vk", + "virtual_key_id": "vk-001", "max_limit": 100.00, "reset_duration": "1M", "current_usage": 0.0, @@ -219,6 +213,7 @@ curl -X DELETE http://localhost:8080/api/governance/virtual-keys/{vk_id} }, { "id": "budget-exec-vk", + "virtual_key_id": "vk-002", "max_limit": 500.00, "reset_duration": "1M", "current_usage": 0.0, diff --git a/docs/features/litellm-compat.mdx b/docs/features/litellm-compat.mdx index 51cd26dcd9..490a37efa4 100644 --- a/docs/features/litellm-compat.mdx +++ b/docs/features/litellm-compat.mdx @@ -9,8 +9,10 @@ icon: "train" The LiteLLM compatibility plugin provides two transformations: 1. **Text-to-Chat Conversion** - Automatically converts text completion requests to chat completion format for models that only support chat APIs +2. **Chat-to-Responses Conversion** - Automatically converts chat completion requests to responses format for models that only support responses APIs +3. **Drop Unsupported Params** - Automatically drops unsupported parameters if the model doesn't support them -When either transformation is applied, responses include `extra_fields.litellm_compat: true`. +When either transformation is applied, responses include `extra_fields.converted_request_type: `. If request parameters are dropped, the keys are added in `extra_fields.dropped_compat_plugin_params`. --- @@ -55,6 +57,36 @@ F --> G - `object: "chat.completion"` → `object: "text_completion"` - Usage statistics and metadata are preserved +## 2. Chat-to-Responses Conversion + +Some AI models (like OpenAI o1-pro) only support the responses API and don't support native chat completion endpoints. LiteLLM compatibility mode automatically handles this by: + +1. Checking if the model supports chat completion natively (using the model catalog) +2. If not supported, converting your chat message to responses API format +3. Calling the responses endpoint internally +4. Transforming the response back to chat completion format + + +**Smart Conversion**: The conversion only happens when the model doesn't support chat completions natively. If a model has native chat completion support (like OpenAI's gpt-4 models), Bifrost uses the chat completion endpoint directly without any conversion. + + +This allows you to use a unified chat completion interface across all providers, even those that only support responses API. + +## How It Works + +When LiteLLM compatibility is enabled and you make a chat completion request, Bifrost first checks if the model supports chat completion: + +```mermaid +flowchart LR +A[Chat Completion Request] --> B{Model Supports Chat Completion?} +B -->|Yes| C[Call Chat Completion API] +B -->|No| D[Convert to Responses Message] +D --> E[Call Responses API] +E --> F[Transform Response] +C --> G[Chat Completion Response] +F --> G +``` + ## Enabling LiteLLM Compatibility @@ -63,7 +95,10 @@ F --> G 1. Open the Bifrost dashboard 2. Navigate to **Settings** → **Client Configuration** -3. Enable **LiteLLM Fallbacks** +3. Expand **LiteLLM Compat** and enable the features you need: + - **Convert Text to Chat** — converts text completion requests to chat for models that only support chat + - **Convert Chat to Responses** — converts chat completion requests to responses for models that only support responses + - **Drop Unsupported Params** — drops unsupported parameters based on model catalog allowlist 4. Save your configuration @@ -73,7 +108,11 @@ F --> G ```json { "client_config": { - "enable_litellm_fallbacks": true + "compat": { + "convert_text_to_chat": true, + "convert_chat_to_responses": true, + "should_drop_params": true + } } } ``` @@ -84,9 +123,9 @@ F --> G ## Supported Providers -LiteLLM compatibility mode works with any provider that supports chat completions but lacks native text completion support: +Text completion to chat completion conversion works with any provider that supports chat completions but lacks native text completion support: -| Provider | Native Text Completion | LiteLLM Fallback | +| Provider | Native Text Completion | With Fallback | |----------|----------------------|------------------| | OpenAI (GPT-4, GPT-3.5-turbo) | No | Yes | | Anthropic (Claude) | No | Yes | @@ -95,6 +134,12 @@ LiteLLM compatibility mode works with any provider that supports chat completion | Mistral | No | Yes | | Bedrock | Varies by model | Yes | +Chat completion to responses conversion works with any provider that supports responses but lacks native chat completion support: + +| Provider | Native Chat Completion | With Fallback | +|----------|----------------------|------------------| +| OpenAI (o1-pro) | No | Yes | + ## Behavior Details **Model Capability Detection:** @@ -117,13 +162,19 @@ LiteLLM compatibility mode works with any provider that supports chat completion | Response | `choices[0].message.content` | `choices[0].text` | | Response | `object: "chat.completion"` | `object: "text_completion"` | +### Transformation 2: Chat-to-Responses Conversion + +**Applies to:** Chat completion requests on responses-only models + +| Phase | Original | Transformed | +|-------|----------|-------------| +| Request | Chat message with `role: "user"` | Responses input with `role: "user"` | +| Request | `chat_completion` request type | `responses` request type | ### Metadata Set on Transformed Responses When either transformation is applied: -- `extra_fields.litellm_compat`: Set to `true` -- `extra_fields.provider`: The provider that handled the request - `extra_fields.request_type`: Reflects the original request type - `extra_fields.original_model_requested`: The originally requested model - `extra_fields.resolved_model_used`: The actual provider API identifier used (equals original_model_requested when no alias mapping exists) @@ -131,8 +182,11 @@ When either transformation is applied: ### Error Handling When errors occur on transformed requests: -- `extra_fields.litellm_compat` is set to `true` - Original request type and model are preserved in error metadata +- `extra_fields.converted_request_type`: Set to type of request that was converted to (i.e., `chat_completion` or `responses`) +- `extra_fields.provider`: The provider that handled the request +- `extra_fields.original_model_requested`: The originally requested model +- `extra_fields.dropped_compat_plugin_params`: If any unsupported parameters were dropped, the keys are added here ## What's Preserved @@ -145,7 +199,7 @@ When errors occur on transformed requests: **Good Use Cases:** - Migrating from LiteLLM to Bifrost without code changes -- Maintaining backward compatibility with text completion interfaces +- Maintaining backward compatibility with text completion interfaces or chat completion interfaces - Using a unified API across providers with different capabilities **Consider Alternatives When:** @@ -157,4 +211,4 @@ When errors occur on transformed requests: - [Fallbacks](/features/fallbacks) - Automatic provider failover - [Drop-in Replacement](/features/drop-in-replacement) - Use existing SDKs with Bifrost -- [LiteLLM Integration](/integrations/litellm-sdk) - Using LiteLLM SDK with Bifrost +- [LiteLLM Integration](/integrations/litellm-sdk) - Using LiteLLM SDK with Bifrost \ No newline at end of file diff --git a/docs/features/observability/prometheus.mdx b/docs/features/observability/prometheus.mdx index ed2a491759..6c6df6a24f 100644 --- a/docs/features/observability/prometheus.mdx +++ b/docs/features/observability/prometheus.mdx @@ -105,12 +105,15 @@ For multi-node cluster deployments, the Prometheus plugin pushes metrics to a [P { "plugins": [ { - "name": "prometheus", + "name": "telemetry", "enabled": true, "config": { - "push_gateway_url": "http://pushgateway:9091", - "job_name": "bifrost", - "push_interval": 15 + "push_gateway": { + "enabled": true, + "push_gateway_url": "http://pushgateway:9091", + "job_name": "bifrost", + "push_interval": 15 + } } } ] @@ -123,16 +126,19 @@ For multi-node cluster deployments, the Prometheus plugin pushes metrics to a [P { "plugins": [ { - "name": "prometheus", + "name": "telemetry", "enabled": true, "config": { - "push_gateway_url": "http://pushgateway:9091", - "job_name": "bifrost", - "push_interval": 15, - "instance_id": "bifrost-node-1", - "basic_auth": { - "username": "admin", - "password": "secret" + "push_gateway": { + "enabled": true, + "push_gateway_url": "http://pushgateway:9091", + "job_name": "bifrost", + "push_interval": 15, + "instance_id": "bifrost-node-1", + "basic_auth": { + "username": "admin", + "password": "secret" + } } } } diff --git a/docs/integrations/guardrails/azure-content-safety.mdx b/docs/integrations/guardrails/azure-content-safety.mdx index 0592145f63..d636ecd6f3 100644 --- a/docs/integrations/guardrails/azure-content-safety.mdx +++ b/docs/integrations/guardrails/azure-content-safety.mdx @@ -19,25 +19,36 @@ Bifrost integrates with **Azure AI Content Safety** to provide multi-modal conte ## Configuration Fields -| Field | Type | Required | Default | Description | -|-------|------|----------|---------|-------------| -| `endpoint` | string | Yes | - | Azure Content Safety endpoint URL | -| `api_key` | string | Yes | - | Azure subscription key | -| `analyze_enabled` | boolean | No | true | Enable content analysis for Hate, Sexual, Violence, SelfHarm | -| `analyze_severity_threshold` | enum | No | "medium" | Severity level to trigger: `low`, `medium`, or `high` | -| `jailbreak_shield_enabled` | boolean | No | false | Enable jailbreak detection (input only) | -| `indirect_attack_shield_enabled` | boolean | No | false | Enable indirect prompt attack detection (input only) | -| `copyright_enabled` | boolean | No | false | Enable copyrighted content detection (output only) | -| `text_blocklist_enabled` | boolean | No | false | Enable custom blocklist filtering | -| `blocklist_names` | array | No | - | List of Azure blocklist names to apply | +| Field | Type | Required | Default | Description | +| -------------------------------- | ------- | -------- | -------- | ------------------------------------------------------------ | +| `endpoint` | string | Yes | - | Azure Content Safety endpoint URL | +| `api_key` | string | Yes | - | Azure subscription key | +| `analyze_enabled` | boolean | No | true | Enable content analysis for Hate, Sexual, Violence, SelfHarm | +| `analyze_severity_threshold` | enum | No | "medium" | Severity level to trigger: `low`, `medium`, or `high` | +| `jailbreak_shield_enabled` | boolean | No | false | Enable jailbreak detection (input only) | +| `indirect_attack_shield_enabled` | boolean | No | false | Enable indirect prompt attack detection (input only) | +| `copyright_enabled` | boolean | No | false | Enable copyrighted content detection (output only) | +| `text_blocklist_enabled` | boolean | No | false | Enable custom blocklist filtering | +| `blocklist_names` | array | No | - | List of Azure blocklist names to apply | + +## Collecting your API key and URL + +Navigate to Azure foundry dashboard + + + Azure foundry dashboard + + +- Copy API key to use it in the Azure content moderation config form +- Copy project endpoint and use base URL as endpoint in the form. e.g. (`https://xxx-resource.services.ai.azure.com`) ## Severity Threshold Levels -| Threshold | Numeric Value | Behavior | -|-----------|---------------|----------| -| `low` | 2 | Most strict - blocks severity 2 and above | -| `medium` | 4 | Balanced - blocks severity 4 and above | -| `high` | 6 | Least strict - blocks only severity 6 | +| Threshold | Numeric Value | Behavior | +| --------- | ------------- | ----------------------------------------- | +| `low` | 2 | Most strict - blocks severity 2 and above | +| `medium` | 4 | Balanced - blocks severity 4 and above | +| `high` | 6 | Least strict - blocks only severity 6 | ## Detection Categories @@ -47,23 +58,23 @@ Bifrost integrates with **Azure AI Content Safety** to provide multi-modal conte - Self-harm -**Input-only features:** Jailbreak Shield and Indirect Attack Shield only apply to input validation. -**Output-only features:** Copyright detection only applies to output validation. + **Input-only features:** Jailbreak Shield and Indirect Attack Shield only apply to input validation. **Output-only + features:** Copyright detection only applies to output validation. ## Provider Capabilities Comparison -| Capability | AWS Bedrock | Azure Content Safety | GraySwan | Patronus AI | -|------------|-------------|---------------------|----------|-------------| -| PII Detection | Yes | No | No | Yes | -| Content Filtering | Yes | Yes | Yes | Yes | -| Prompt Injection | Yes | Yes | Yes | Yes | -| Hallucination Detection | No | No | No | Yes | -| Toxicity Screening | Yes | Yes | Yes | Yes | -| Custom Policies | Yes | Yes | Yes | Yes | -| Custom Natural Language Rules | No | No | Yes | No | -| Image Support | Yes | No | No | No | -| IPI Detection | No | Yes | Yes | No | -| Mutation Detection | No | No | Yes | No | +| Capability | AWS Bedrock | Azure Content Safety | GraySwan | Patronus AI | +| ----------------------------- | ----------- | -------------------- | -------- | ----------- | +| PII Detection | Yes | No | No | Yes | +| Content Filtering | Yes | Yes | Yes | Yes | +| Prompt Injection | Yes | Yes | Yes | Yes | +| Hallucination Detection | No | No | No | Yes | +| Toxicity Screening | Yes | Yes | Yes | Yes | +| Custom Policies | Yes | Yes | Yes | Yes | +| Custom Natural Language Rules | No | No | Yes | No | +| Image Support | Yes | No | No | No | +| IPI Detection | No | Yes | Yes | No | +| Mutation Detection | No | No | Yes | No | For information on configuring guardrail rules and profiles, see [Guardrails](/enterprise/guardrails). diff --git a/docs/media/compatibility-settings.png b/docs/media/compatibility-settings.png new file mode 100644 index 0000000000..69b62a60d9 Binary files /dev/null and b/docs/media/compatibility-settings.png differ diff --git a/docs/media/custom-base-url.mp4 b/docs/media/custom-base-url.mp4 new file mode 100644 index 0000000000..2b32e8e9a3 Binary files /dev/null and b/docs/media/custom-base-url.mp4 differ diff --git a/docs/media/guardrails/azure-api-key.png b/docs/media/guardrails/azure-api-key.png index 3a8d86f398..daad86a7be 100644 Binary files a/docs/media/guardrails/azure-api-key.png and b/docs/media/guardrails/azure-api-key.png differ diff --git a/docs/media/guardrails/microsoft-guardrails-url.png b/docs/media/guardrails/microsoft-guardrails-url.png new file mode 100644 index 0000000000..f56a220ddd Binary files /dev/null and b/docs/media/guardrails/microsoft-guardrails-url.png differ diff --git a/docs/media/setting-up-dashboard-auth.png b/docs/media/setting-up-dashboard-auth.png index 3fb3ccb253..512273e4a6 100644 Binary files a/docs/media/setting-up-dashboard-auth.png and b/docs/media/setting-up-dashboard-auth.png differ diff --git a/docs/media/ui-config.png b/docs/media/ui-config.png index 8ea6ffe88f..7cb29c9bb0 100644 Binary files a/docs/media/ui-config.png and b/docs/media/ui-config.png differ diff --git a/docs/media/ui-multi-key-for-models.png b/docs/media/ui-multi-key-for-models.png index 2a049ca4b6..95759fff81 100644 Binary files a/docs/media/ui-multi-key-for-models.png and b/docs/media/ui-multi-key-for-models.png differ diff --git a/docs/media/ui-provider-configs.png b/docs/media/ui-provider-configs.png index 20f46c2378..b3c4d3a71a 100644 Binary files a/docs/media/ui-provider-configs.png and b/docs/media/ui-provider-configs.png differ diff --git a/docs/media/user-provisioning/.custom-attribute-mapping.png-TkS4 b/docs/media/user-provisioning/.custom-attribute-mapping.png-TkS4 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/media/user-provisioning/.custom-attribute-mapping.png-e0eo b/docs/media/user-provisioning/.custom-attribute-mapping.png-e0eo new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/media/user-provisioning/.scim-import-preview.png-sIys b/docs/media/user-provisioning/.scim-import-preview.png-sIys new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/media/user-provisioning/attribute-to-entity-mapping.png b/docs/media/user-provisioning/attribute-to-entity-mapping.png new file mode 100644 index 0000000000..cad10c4dca Binary files /dev/null and b/docs/media/user-provisioning/attribute-to-entity-mapping.png differ diff --git a/docs/media/user-provisioning/custom-attribute-mapping.png b/docs/media/user-provisioning/custom-attribute-mapping.png new file mode 100644 index 0000000000..1a51ebaa63 Binary files /dev/null and b/docs/media/user-provisioning/custom-attribute-mapping.png differ diff --git a/docs/media/user-provisioning/entra-app-manifest.png b/docs/media/user-provisioning/entra-app-manifest.png new file mode 100644 index 0000000000..fa8b3696af Binary files /dev/null and b/docs/media/user-provisioning/entra-app-manifest.png differ diff --git a/docs/media/user-provisioning/entra-form.png b/docs/media/user-provisioning/entra-form.png new file mode 100644 index 0000000000..72df177f1c Binary files /dev/null and b/docs/media/user-provisioning/entra-form.png differ diff --git a/docs/media/user-provisioning/gws-apis-and-services.png b/docs/media/user-provisioning/gws-apis-and-services.png new file mode 100644 index 0000000000..89c8101f0d Binary files /dev/null and b/docs/media/user-provisioning/gws-apis-and-services.png differ diff --git a/docs/media/user-provisioning/gws-form.png b/docs/media/user-provisioning/gws-form.png new file mode 100644 index 0000000000..8cae2fb0d4 Binary files /dev/null and b/docs/media/user-provisioning/gws-form.png differ diff --git a/docs/media/user-provisioning/okta-form.png b/docs/media/user-provisioning/okta-form.png new file mode 100644 index 0000000000..4089a3239f Binary files /dev/null and b/docs/media/user-provisioning/okta-form.png differ diff --git a/docs/media/user-provisioning/scim-attribute-mapping.png b/docs/media/user-provisioning/scim-attribute-mapping.png new file mode 100644 index 0000000000..6d48d5e7ba Binary files /dev/null and b/docs/media/user-provisioning/scim-attribute-mapping.png differ diff --git a/docs/media/user-provisioning/scim-flow.png b/docs/media/user-provisioning/scim-flow.png new file mode 100644 index 0000000000..10f1950674 Binary files /dev/null and b/docs/media/user-provisioning/scim-flow.png differ diff --git a/docs/media/user-provisioning/scim-import-preview.png b/docs/media/user-provisioning/scim-import-preview.png new file mode 100644 index 0000000000..928fa67825 Binary files /dev/null and b/docs/media/user-provisioning/scim-import-preview.png differ diff --git a/docs/media/user-provisioning/scim-overview.png b/docs/media/user-provisioning/scim-overview.png new file mode 100644 index 0000000000..d9f6fcd1f7 Binary files /dev/null and b/docs/media/user-provisioning/scim-overview.png differ diff --git a/docs/media/user-provisioning/scim-provider-select.png b/docs/media/user-provisioning/scim-provider-select.png new file mode 100644 index 0000000000..9d2a0854bf Binary files /dev/null and b/docs/media/user-provisioning/scim-provider-select.png differ diff --git a/docs/media/user-provisioning/zitadel-assert-roles.png b/docs/media/user-provisioning/zitadel-assert-roles.png new file mode 100644 index 0000000000..bfbbf5d176 Binary files /dev/null and b/docs/media/user-provisioning/zitadel-assert-roles.png differ diff --git a/docs/media/user-provisioning/zitadel-client-id.png b/docs/media/user-provisioning/zitadel-client-id.png new file mode 100644 index 0000000000..15da97d447 Binary files /dev/null and b/docs/media/user-provisioning/zitadel-client-id.png differ diff --git a/docs/media/user-provisioning/zitadel-form.png b/docs/media/user-provisioning/zitadel-form.png new file mode 100644 index 0000000000..6fa7884cdb Binary files /dev/null and b/docs/media/user-provisioning/zitadel-form.png differ diff --git a/docs/media/user-provisioning/zitadel-project-roles.png b/docs/media/user-provisioning/zitadel-project-roles.png new file mode 100644 index 0000000000..bdd1b2f363 Binary files /dev/null and b/docs/media/user-provisioning/zitadel-project-roles.png differ diff --git a/docs/media/user-provisioning/zitadel-refresh-token.png b/docs/media/user-provisioning/zitadel-refresh-token.png new file mode 100644 index 0000000000..dba6d447f8 Binary files /dev/null and b/docs/media/user-provisioning/zitadel-refresh-token.png differ diff --git a/docs/media/user-provisioning/zitadel-service-account-create.png b/docs/media/user-provisioning/zitadel-service-account-create.png new file mode 100644 index 0000000000..f540ac60ef Binary files /dev/null and b/docs/media/user-provisioning/zitadel-service-account-create.png differ diff --git a/docs/media/user-provisioning/zitadel-service-account-key.png b/docs/media/user-provisioning/zitadel-service-account-key.png new file mode 100644 index 0000000000..679e08d1f0 Binary files /dev/null and b/docs/media/user-provisioning/zitadel-service-account-key.png differ diff --git a/docs/media/user-provisioning/zitadel-service-account-role.png b/docs/media/user-provisioning/zitadel-service-account-role.png new file mode 100644 index 0000000000..49ebe761f4 Binary files /dev/null and b/docs/media/user-provisioning/zitadel-service-account-role.png differ diff --git a/docs/media/user-provisioning/zitadel-token-settings.png b/docs/media/user-provisioning/zitadel-token-settings.png new file mode 100644 index 0000000000..29705c831c Binary files /dev/null and b/docs/media/user-provisioning/zitadel-token-settings.png differ diff --git a/docs/media/user-provisioning/zitadel-user-role-assignment.png b/docs/media/user-provisioning/zitadel-user-role-assignment.png new file mode 100644 index 0000000000..807acadd9d Binary files /dev/null and b/docs/media/user-provisioning/zitadel-user-role-assignment.png differ diff --git a/docs/migration-guides/v1.5.0.mdx b/docs/migration-guides/v1.5.0.mdx index de384bfeca..7d1933aaac 100644 --- a/docs/migration-guides/v1.5.0.mdx +++ b/docs/migration-guides/v1.5.0.mdx @@ -319,15 +319,6 @@ The database migration runs automatically on startup, migrating existing deploym "gpt-4o-mini": "my-mini-deployment" } }] - "ollama": { - "keys": [ - { - "id": "ollama-local", - "models": ["*"], - "weight": 1.0, - "ollama_key_config": { "url": "http://localhost:11434" } - } - ] } } } @@ -471,6 +462,42 @@ result.ResolvedModel // actual model identifier used by the provider --- +## Opting Out: `version: 1` Compatibility Mode + +If you are not ready to adopt the new deny-by-default semantics, you can add a single field to `config.json` to restore v1.4.x behavior for all allow-list fields loaded from that file: + +```json +{ + "version": 1, + "providers": { ... } +} +``` + +| Value | Behavior | +|---|---| +| `2` (default, omitted) | v1.5.0 semantics — empty = deny all, `["*"]` = allow all | +| `1` | v1.4.x semantics — empty = allow all | + +**What `version: 1` normalizes at startup** (before any other processing): + +| Field | Without `version: 1` | With `version: 1` | +|---|---|---| +| Provider key `models: []` | Deny all models | Allow all models (→ `["*"]`) | +| VK `provider_configs: []` | No providers allowed | All configured providers added with `allowed_models: ["*"]` | +| VK provider config `allowed_models: []` | Deny all models | Allow all models (→ `["*"]`) | +| VK provider config `key_ids: []` | No keys allowed | All keys allowed (→ `key_ids: ["*"]`) | +| VK `mcp_configs: []` | No MCP tools allowed | All configured MCP clients added with `tools_to_execute: ["*"]` | + + +`version: 1` only applies to configuration loaded from `config.json`. Virtual Keys created or updated via the REST API always use v1.5.0 semantics regardless of this setting. The automatic database migration that runs on startup is also unaffected. + + + +`version: 1` is a temporary compatibility shim. Plan to migrate your `config.json` to explicit `["*"]` wildcards and remove the `version` field before the next major release. + + +--- + ## Complete Migration Checklist diff --git a/docs/openapi/openapi.json b/docs/openapi/openapi.json index da486a8e81..2a8e0d7a89 100644 --- a/docs/openapi/openapi.json +++ b/docs/openapi/openapi.json @@ -33438,6 +33438,85 @@ } } }, + "/api/governance/virtual-keys/quota": { + "get": { + "operationId": "getVirtualKeyQuota", + "summary": "Get virtual key quota", + "description": "Returns the budget and rate limit quota for the authenticated virtual key.\nThis is a self-service endpoint — no admin authentication required.\nThe virtual key value itself (provided via header) is the credential.\n", + "tags": [ + "Governance" + ], + "security": [ + { + "VirtualKeyAuth": [] + }, + { + "BearerAuth": [] + }, + { + "ApiKeyAuth": [] + } + ], + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "type": "object", + "description": "Virtual key quota response (self-service, no admin auth required)", + "properties": { + "virtual_key_name": { + "type": "string", + "description": "Name of the virtual key" + }, + "is_active": { + "type": "boolean", + "description": "Whether the virtual key is active" + }, + "budgets": { + "type": "array", + "description": "Budget quotas assigned to this virtual key", + "items": { + "$ref": "#/components/schemas/Budget" + } + }, + "rate_limit": { + "$ref": "#/components/schemas/RateLimit" + } + } + } + } + } + }, + "401": { + "description": "Missing or invalid virtual key", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string" + } + } + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BifrostError" + } + } + } + } + } + } + }, "/api/governance/virtual-keys/{vk_id}": { "get": { "operationId": "getVirtualKey", @@ -51547,9 +51626,29 @@ "type": "integer", "description": "Maximum request body size in MB" }, - "enable_litellm_fallbacks": { - "type": "boolean", - "description": "Whether LiteLLM fallbacks are enabled" + "compat": { + "type": "object", + "description": "Compat plugin configuration", + "properties": { + "convert_text_to_chat": { + "type": "boolean", + "description": "Convert text completion requests to chat" + }, + "convert_chat_to_responses": { + "type": "boolean", + "description": "Convert chat completion requests to responses" + }, + "should_drop_params": { + "type": "boolean", + "description": "Drop unsupported parameters based on model catalog" + }, + "should_convert_params": { + "type": "boolean", + "default": false, + "description": "Converts model parameter values that are not supported by the model" + } + }, + "additionalProperties": false }, "log_retention_days": { "type": "integer", @@ -51762,9 +51861,29 @@ "type": "integer", "description": "Maximum request body size in MB" }, - "enable_litellm_fallbacks": { - "type": "boolean", - "description": "Whether LiteLLM fallbacks are enabled" + "compat": { + "type": "object", + "description": "Compat plugin configuration", + "properties": { + "convert_text_to_chat": { + "type": "boolean", + "description": "Convert text completion requests to chat" + }, + "convert_chat_to_responses": { + "type": "boolean", + "description": "Convert chat completion requests to responses" + }, + "should_drop_params": { + "type": "boolean", + "description": "Drop unsupported parameters based on model catalog" + }, + "should_convert_params": { + "type": "boolean", + "default": false, + "description": "Converts model parameter values that are not supported by the model" + } + }, + "additionalProperties": false }, "log_retention_days": { "type": "integer", diff --git a/docs/openapi/openapi.yaml b/docs/openapi/openapi.yaml index 7142399a8b..a4a6722dbf 100644 --- a/docs/openapi/openapi.yaml +++ b/docs/openapi/openapi.yaml @@ -668,6 +668,8 @@ paths: # Governance - Virtual Keys /api/governance/virtual-keys: $ref: './paths/management/governance.yaml#/virtual-keys' + /api/governance/virtual-keys/quota: + $ref: './paths/management/governance.yaml#/virtual-keys-quota' /api/governance/virtual-keys/{vk_id}: $ref: './paths/management/governance.yaml#/virtual-keys-by-id' diff --git a/docs/openapi/paths/management/governance.yaml b/docs/openapi/paths/management/governance.yaml index 35e38e1b99..a49d28656e 100644 --- a/docs/openapi/paths/management/governance.yaml +++ b/docs/openapi/paths/management/governance.yaml @@ -48,6 +48,39 @@ virtual-keys: '500': $ref: '../../openapi.yaml#/components/responses/InternalError' +virtual-keys-quota: + get: + operationId: getVirtualKeyQuota + summary: Get virtual key quota + description: | + Returns the budget and rate limit quota for the authenticated virtual key. + This is a self-service endpoint — no admin authentication required. + The virtual key value itself (provided via header) is the credential. + tags: + - Governance + security: + - VirtualKeyAuth: [] + - BearerAuth: [] + - ApiKeyAuth: [] + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../schemas/management/governance.yaml#/VirtualKeyQuotaResponse' + '401': + description: Missing or invalid virtual key + content: + application/json: + schema: + type: object + properties: + error: + type: string + '500': + $ref: '../../openapi.yaml#/components/responses/InternalError' + virtual-keys-by-id: get: operationId: getVirtualKey diff --git a/docs/openapi/schemas/management/config.yaml b/docs/openapi/schemas/management/config.yaml index 2c54b3979d..eaafb3821f 100644 --- a/docs/openapi/schemas/management/config.yaml +++ b/docs/openapi/schemas/management/config.yaml @@ -44,9 +44,24 @@ ClientConfig: max_request_body_size_mb: type: integer description: Maximum request body size in MB - enable_litellm_fallbacks: - type: boolean - description: Whether LiteLLM fallbacks are enabled + compat: + type: object + description: Compat plugin configuration + properties: + convert_text_to_chat: + type: boolean + description: Convert text completion requests to chat + convert_chat_to_responses: + type: boolean + description: Convert chat completion requests to responses + should_drop_params: + type: boolean + description: Drop unsupported parameters based on model catalog + should_convert_params: + type: boolean + default: false + description: Converts model parameter values that are not supported by the model + additionalProperties: false log_retention_days: type: integer description: Number of days to retain logs diff --git a/docs/openapi/schemas/management/governance.yaml b/docs/openapi/schemas/management/governance.yaml index c195faeb94..bd04aecee7 100644 --- a/docs/openapi/schemas/management/governance.yaml +++ b/docs/openapi/schemas/management/governance.yaml @@ -331,6 +331,24 @@ ListVirtualKeysResponse: count: type: integer +VirtualKeyQuotaResponse: + type: object + description: Virtual key quota response (self-service, no admin auth required) + properties: + virtual_key_name: + type: string + description: Name of the virtual key + is_active: + type: boolean + description: Whether the virtual key is active + budgets: + type: array + description: Budget quotas assigned to this virtual key + items: + $ref: '#/Budget' + rate_limit: + $ref: '#/RateLimit' + VirtualKeyResponse: type: object description: Virtual key operation response diff --git a/docs/overview.mdx b/docs/overview.mdx index 2960604cf3..e1d7cd33ad 100644 --- a/docs/overview.mdx +++ b/docs/overview.mdx @@ -122,9 +122,6 @@ Advanced capabilities for teams running production AI systems at scale. Enterpri Transform existing enterprise APIs into MCP tools using federated authentication — no code required. - - Secure key management with HashiCorp Vault, AWS Secrets Manager, Google Secret Manager, and Azure Key Vault. - Deploy within your private cloud infrastructure with VPC isolation and enhanced security controls. diff --git a/docs/plugins/building-dynamic-binary.mdx b/docs/plugins/building-dynamic-binary.mdx index 1c46d8e749..9d1329e118 100644 --- a/docs/plugins/building-dynamic-binary.mdx +++ b/docs/plugins/building-dynamic-binary.mdx @@ -98,7 +98,7 @@ Use this for Alpine-based deployments or when you want minimal image size. ```dockerfile -# --- UI Build Stage: Build the Next.js frontend --- +# --- UI Build Stage: Build the React + Vite frontend --- FROM node:25-alpine3.23 AS ui-builder WORKDIR /app @@ -110,8 +110,7 @@ RUN npm ci COPY ui/ ./ # Build UI (skip the copy-build step) -RUN npx next build -RUN node scripts/fix-paths.js +RUN npm run build-enterprise # --- Go Build Stage: Compile the Go binary --- FROM golang:1.26.1-alpine3.23 AS builder @@ -215,7 +214,7 @@ Use this for Debian/Ubuntu-based deployments or when deploying to glibc-based sy ```dockerfile -# --- UI Build Stage: Build the Next.js frontend --- +# --- UI Build Stage: Build the React + Vite frontend --- FROM node:25-bookworm AS ui-builder WORKDIR /app @@ -227,8 +226,7 @@ RUN npm ci COPY ui/ ./ # Build UI -RUN npx next build -RUN node scripts/fix-paths.js +RUN npm run build-enterprise # --- Go Build Stage: Compile the Go binary --- FROM golang:1.26.1-bookworm AS builder diff --git a/docs/plugins/writing-go-plugin.mdx b/docs/plugins/writing-go-plugin.mdx index 146bdd8e28..0f159478f8 100644 --- a/docs/plugins/writing-go-plugin.mdx +++ b/docs/plugins/writing-go-plugin.mdx @@ -1138,7 +1138,9 @@ plugin was built with a different version of package github.com/maximhq/bifrost/ ```json { - "log_level": "debug", + "client": { + "enable_logging": true + }, "plugins": [ { "enabled": true, @@ -1150,6 +1152,8 @@ plugin was built with a different version of package github.com/maximhq/bifrost/ } ``` +For verbose plugin-loader logs, set `BIFROST_LOG_LEVEL=debug` in the environment. + **Check plugin symbols:** ```bash diff --git a/docs/providers/custom-pricing.mdx b/docs/providers/custom-pricing.mdx index 13c883773f..43931fc93a 100644 --- a/docs/providers/custom-pricing.mdx +++ b/docs/providers/custom-pricing.mdx @@ -268,6 +268,8 @@ Only fields with non-zero values are applied. All values are cost **per unit** i | `output_cost_per_token_batches` | Output token cost for batch requests | | `input_cost_per_token_priority` | Input token cost for priority requests | | `output_cost_per_token_priority` | Output token cost for priority requests | +| `input_cost_per_token_flex` | Input token cost for flex requests | +| `output_cost_per_token_flex` | Output token cost for flex requests | | `input_cost_per_character` | Input cost per character (character-billed models) | ### Token tier costs @@ -277,7 +279,13 @@ Only fields with non-zero values are applied. All values are cost **per unit** i | `input_cost_per_token_above_128k_tokens` | Input cost above 128k context | | `output_cost_per_token_above_128k_tokens` | Output cost above 128k context | | `input_cost_per_token_above_200k_tokens` | Input cost above 200k context | +| `input_cost_per_token_above_200k_tokens_priority` | Input cost above 200k context for priority requests | | `output_cost_per_token_above_200k_tokens` | Output cost above 200k context | +| `output_cost_per_token_above_200k_tokens_priority` | Output cost above 200k context for priority requests | +| `input_cost_per_token_above_272k_tokens` | Input cost above 272k context | +| `input_cost_per_token_above_272k_tokens_priority` | Input cost above 272k context for priority requests | +| `output_cost_per_token_above_272k_tokens` | Output cost above 272k context | +| `output_cost_per_token_above_272k_tokens_priority` | Output cost above 272k context for priority requests | ### Cache costs @@ -287,7 +295,11 @@ Only fields with non-zero values are applied. All values are cost **per unit** i | `cache_read_input_token_cost` | Cost to read a cached input token | | `cache_creation_input_token_cost_above_200k_tokens` | Cache creation above 200k context | | `cache_read_input_token_cost_above_200k_tokens` | Cache read above 200k context | +| `cache_read_input_token_cost_above_200k_tokens_priority` | Cache read above 200k context for priority requests | | `cache_read_input_token_cost_priority` | Priority cache read cost | +| `cache_read_input_token_cost_flex` | Flex cache read cost | +| `cache_read_input_token_cost_above_272k_tokens` | Cache read above 272k context | +| `cache_read_input_token_cost_above_272k_tokens_priority` | Cache read above 272k context for priority requests | | `cache_read_input_image_token_cost` | Cache read cost for image tokens | | `cache_creation_input_audio_token_cost` | Cache creation cost for audio tokens | diff --git a/docs/providers/request-options.mdx b/docs/providers/request-options.mdx index b542e702fa..55d91481a7 100644 --- a/docs/providers/request-options.mdx +++ b/docs/providers/request-options.mdx @@ -16,7 +16,9 @@ Bifrost provides request options that control behavior, enable features, and pas | `BifrostContextKeySessionID` | `x-bf-session-id` | `string` | Session ID for key stickiness (requires KV store) | | `BifrostContextKeySessionTTL` | `x-bf-session-ttl` | `time.Duration` | Session-to-key cache TTL (duration string or seconds) | | `BifrostContextKeyRequestID` | `x-request-id` | `string` | Custom request ID for tracking | -| `BifrostContextKeySendBackRawResponse` | `x-bf-send-back-raw-response` | `bool` | Include raw provider response | +| `BifrostContextKeySendBackRawRequest` | `x-bf-send-back-raw-request` | `bool` | Include raw provider request in the response | +| `BifrostContextKeySendBackRawResponse` | `x-bf-send-back-raw-response` | `bool` | Include raw provider response in the response | +| `BifrostContextKeyStoreRawRequestResponse` | `x-bf-store-raw-request-response` | `bool` | Persist raw request/response in log records | | `BifrostContextKeyPassthroughExtraParams` | `x-bf-passthrough-extra-params` | `bool` | Enable passthrough for extra parameters | | `BifrostContextKeyExtraHeaders` | `x-bf-eh-*` | `map[string][]string` | Custom headers forwarded to provider | | `BifrostContextKeyDirectKey` | `-` | `schemas.Key` | Direct key credentials (Go SDK only) | @@ -269,14 +271,69 @@ response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, sch +### Send Back Raw Request + +**Context Key:** `BifrostContextKeySendBackRawRequest` +**Header:** `x-bf-send-back-raw-request` +**Type:** `bool` (header values: `"true"` or `"false"`) +**Required:** No + +Include the exact JSON body sent to the provider alongside Bifrost's standardized response. Accepts `"true"` or `"false"` — either value fully overrides the provider-level `send_back_raw_request` config for this request. + + + +```bash +curl --location 'http://localhost:8080/v1/chat/completions' \ +--header 'x-bf-send-back-raw-request: true' \ +--header 'Content-Type: application/json' \ +--data '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Hello!"}] +}' +``` + + +```go +ctx := context.Background() +ctx = context.WithValue(ctx, schemas.BifrostContextKeySendBackRawRequest, true) + +response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4o-mini", + Input: messages, +}) + +// Access raw request +if response.ChatResponse != nil { + rawReq := response.ChatResponse.ExtraFields.RawRequest +} +``` + + + +The raw request appears in `extra_fields.raw_request`: + +```json +{ + "choices": [...], + "usage": {...}, + "extra_fields": { + "provider": "openai", + "raw_request": { + // Exact JSON sent to the provider + } + } +} +``` + ### Send Back Raw Response **Context Key:** `BifrostContextKeySendBackRawResponse` **Header:** `x-bf-send-back-raw-response` -**Type:** `bool` (header value: `"true"`) +**Type:** `bool` (header values: `"true"` or `"false"`) **Required:** No -Include the original provider response alongside Bifrost's standardized response format. +Include the original provider response alongside Bifrost's standardized response format. Accepts `"true"` or `"false"` — either value fully overrides the provider-level `send_back_raw_response` config for this request. @@ -324,6 +381,51 @@ The raw response appears in `extra_fields.raw_response`: } ``` +### Store Raw Request/Response + +**Context Key:** `BifrostContextKeyStoreRawRequestResponse` +**Header:** `x-bf-store-raw-request-response` +**Type:** `bool` (header values: `"true"` or `"false"`) +**Required:** No + +Persist the raw provider request and response in the log record. Accepts `"true"` or `"false"` — either value fully overrides the provider-level `store_raw_request_response` config for this request. + +This is orthogonal to the send-back flags: enabling this does not affect whether raw data appears in the API response, and enabling send-back does not automatically store raw data in logs. Use this when you want observability into provider payloads without necessarily exposing them to the caller, or combine it with `x-bf-send-back-raw-*` to do both. + + + +```bash +curl --location 'http://localhost:8080/v1/chat/completions' \ +--header 'x-bf-store-raw-request-response: true' \ +--header 'Content-Type: application/json' \ +--data '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Hello!"}] +}' +``` + + +```go +ctx := context.Background() +ctx = context.WithValue(ctx, schemas.BifrostContextKeyStoreRawRequestResponse, true) + +response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4o-mini", + Input: messages, +}) +// Raw data is persisted in the log record. +// ExtraFields.RawRequest/RawResponse are nil unless send-back flags are also enabled. +``` + + + + +`x-bf-store-raw-request-response` only has effect when the logging plugin is active — raw data is written to the log record by the logging plugin. Without it, enabling this flag captures the data but nothing persists it. + +`x-bf-store-raw-request-response` and `x-bf-send-back-raw-*` are orthogonal — you can enable any combination. Enabling store does not send data back to the caller; enabling send-back does not persist data in logs. Enable both to do both. + + ### Passthrough Extra Parameters **Context Key:** `BifrostContextKeyPassthroughExtraParams` diff --git a/docs/providers/supported-providers/overview.mdx b/docs/providers/supported-providers/overview.mdx index b3ae42f62f..98d13ffa73 100644 --- a/docs/providers/supported-providers/overview.mdx +++ b/docs/providers/supported-providers/overview.mdx @@ -48,7 +48,7 @@ The following table summarizes which operations are supported by each provider v Some operations are not supported by the downstream provider, and their internal implementation in Bifrost is optional. 🟡 -Like Text completions are not supported by Groq, but Bifrost can emulate them internally using the Chat Completions API. This feature is disabled by default, but it can be enabled by setting the `enable_litellm_fallbacks` flag to `true` in the client configuration. +Like Text completions are not supported by Groq, but Bifrost can emulate them internally using the Chat Completions API. This feature is disabled by default, but it can be enabled by setting `compat.convert_text_to_chat` to `true` in the client configuration. We do not promote using such fallbacks, since text completions and chat completions are fundamentally different. However, this option is available to help users migrating from LiteLLM (which does support these fallbacks). diff --git a/docs/quickstart/gateway/provider-configuration.mdx b/docs/quickstart/gateway/provider-configuration.mdx index 5986ed87f6..597d3b2a1e 100644 --- a/docs/quickstart/gateway/provider-configuration.mdx +++ b/docs/quickstart/gateway/provider-configuration.mdx @@ -337,7 +337,12 @@ Override the default API endpoint for a provider. This is useful for connecting -![Base URL Configuration Interface](../../media/ui-base-url.png) + + + + 1. Navigate to **"Model Providers"** → **"Configurations"** → **"OpenAI"** → **"Provider level configuration"** → **"Network config"** 2. Set **Base URL**: `http://localhost:8000/v1` @@ -860,6 +865,10 @@ curl --location 'http://localhost:8080/api/providers' \ Include the original provider response alongside Bifrost's standardized response format. Useful for debugging and accessing provider-specific metadata. + +You can override this per request using the `x-bf-send-back-raw-response` header (`"true"` or `"false"`), regardless of the provider-level config. See [Request Options](../../providers/request-options#send-back-raw-response) for details. + + @@ -936,6 +945,10 @@ When enabled, the raw provider response appears in `extra_fields.raw_response`: Include the original request sent to the provider alongside Bifrost's response. Useful for debugging request transformations and verifying what was actually sent to the provider. + +You can override this per request using the `x-bf-send-back-raw-request` header (`"true"` or `"false"`), regardless of the provider-level config. See [Request Options](../../providers/request-options#send-back-raw-request) for details. + + @@ -1012,6 +1025,71 @@ When enabled, the raw provider request appears in `extra_fields.raw_request`: You can enable both `send_back_raw_request` and `send_back_raw_response` together to see the complete request-response cycle for debugging purposes. +### Store Raw Request/Response + +Persist the raw provider request and response in the log record. This is orthogonal to `send_back_raw_request` and `send_back_raw_response` — enabling this does not affect whether raw data appears in the API response, and enabling send-back does not automatically store raw data in logs. Enable both to do both. + + + + + +1. Navigate to **"Model Providers"** → **"Configurations"** → **{Provider}** → **"Provider level configuration"** → **"Performance tuning"** +2. Toggle **"Store Raw Request/Response"** to enabled +3. Save configuration + + + + + +```bash +curl --location 'http://localhost:8080/api/providers' \ +--header 'Content-Type: application/json' \ +--data '{ + "provider": "openai", + "keys": [ + { + "name": "openai-key-1", + "value": "env.OPENAI_API_KEY", + "models": ["*"], + "weight": 1.0 + } + ], + "store_raw_request_response": true +}' +``` + + + + + +```json +{ + "providers": { + "openai": { + "keys": [ + { + "name": "openai-key-1", + "value": "env.OPENAI_API_KEY", + "models": ["*"], + "weight": 1.0 + } + ], + "store_raw_request_response": true + } + } +} +``` + + + + + + +`store_raw_request_response` only has effect when the logging plugin is active — raw data is written to the log record by the logging plugin. Without it, enabling this flag captures the data but nothing persists it. + +You can override this per request using the `x-bf-store-raw-request-response` header (`"true"` or `"false"`), regardless of the provider-level config. See [Request Options](../../providers/request-options#store-raw-requestresponse) for details. + + ### Passthrough Extra Parameters Enable passthrough mode for extra parameters. When enabled, any parameters in the `extra_params` field (or provider-specific extra parameter fields) will be merged directly into the request sent to the provider, bypassing Bifrost's parameter filtering. diff --git a/docs/quickstart/gateway/setting-up-auth.mdx b/docs/quickstart/gateway/setting-up-auth.mdx index 3e01340ff1..bf29677feb 100644 --- a/docs/quickstart/gateway/setting-up-auth.mdx +++ b/docs/quickstart/gateway/setting-up-auth.mdx @@ -4,6 +4,8 @@ description: "Learn how to enable basic authentication for the Bifrost dashboard icon: "lock" --- +This feature is only available in OSS. For enterprise builds you can setup [SCIM](/enterprise/scim) + ## Overview Bifrost provides built-in authentication to protect your dashboard and admin API endpoints. When enabled, users must log in with credentials before accessing the dashboard or making admin API calls. This feature helps secure your Bifrost instance, especially when deployed in production environments. @@ -25,7 +27,8 @@ Bifrost provides built-in authentication to protect your dashboard and admin API 3. Enter your **Password** in the admin password field -The username and password fields are only enabled when the authentication toggle is turned on. Make sure to use a strong password for security. + The username and password fields are only enabled when the authentication toggle is turned on. Make sure to use a + strong password for security. ### Step 3: Configure Inference Call Authentication (Optional) @@ -39,7 +42,8 @@ By default, when authentication is enabled, all API calls (including inference c - MCP tool execution calls will still require authentication -This option is useful if you want to protect your dashboard and admin functions while allowing public access to inference endpoints. + This option is useful if you want to protect your dashboard and admin functions while allowing public access to + inference endpoints. ### Step 4: Configure Whitelisted Routes (Optional) @@ -54,12 +58,15 @@ You can configure specific routes that bypass the authentication middleware enti **Wildcard support:** Routes ending with `*` are treated as prefix matches. For example, `/api/webhook*` will match `/api/webhook`, `/api/webhook/v1`, `/api/webhook/github`, etc. **Example values:** + ``` /api/custom-webhook, /api/public-endpoint, /api/webhook* ``` -System routes like `/health`, `/api/session/login`, `/api/session/is-auth-enabled`, `/api/oauth/callback`, and `/api/info` are always whitelisted regardless of this setting. Whitelisted routes only apply to dashboard and admin API endpoints — inference endpoints have their own toggle (see Step 3). + System routes like `/health`, `/api/session/login`, `/api/session/is-auth-enabled`, `/api/oauth/callback`, and + `/api/info` are always whitelisted regardless of this setting. Whitelisted routes only apply to dashboard and admin + API endpoints — inference endpoints have their own toggle (see Step 3). ### Step 5: Save Changes @@ -99,6 +106,7 @@ When authentication is enabled for inference calls (i.e., the "Disable authentic - **Basic Authentication**: Username and Password in Basic auth - **Bearer Token**: base64 string of username:password as bearer token + ### Whitelisted Routes When a route is added to the whitelisted routes list in Security settings, requests to that path bypass authentication entirely — no Basic Auth or Bearer Token is required. This applies only to dashboard and admin API endpoints. Inference endpoints are controlled separately via the "Disable authentication on inference calls" toggle. @@ -132,4 +140,4 @@ To disable authentication: 2. Toggle off the **Password protect the dashboard** switch 3. Click **Save Changes** -After disabling, the dashboard will be accessible without authentication immediately. \ No newline at end of file +After disabling, the dashboard will be accessible without authentication immediately. diff --git a/docs/quickstart/gateway/setting-up.mdx b/docs/quickstart/gateway/setting-up.mdx index d5195e2023..7dd58228a9 100644 --- a/docs/quickstart/gateway/setting-up.mdx +++ b/docs/quickstart/gateway/setting-up.mdx @@ -48,15 +48,15 @@ docker pull maximhq/bifrost:v1.3.9-arm64 # For configuration persistence across restarts docker run -p 8080:8080 -v $(pwd)/data:/app/data maximhq/bifrost ``` -### 2. Configuration Flags -| Flag | Default | NPX | Docker | Description | -|------|---------|-----|--------|-------------| -| port | 8080 | `-port 8080` | `-e APP_PORT=8080 -p 8080:8080` | HTTP server port | -| host | localhost | `-host 0.0.0.0` | `-e APP_HOST=0.0.0.0` | Host to bind server to | -| log-level | info | `-log-level info` | `-e LOG_LEVEL=info` | Log level (debug, info, warn, error) | -| log-style | json | `-log-style json` | `-e LOG_STYLE=json` | Log style (pretty, json) | +### 2. Configuration Flags +| Flag | Default | NPX | Docker | Description | +| --------- | --------- | ----------------- | ------------------------------- | ------------------------------------ | +| port | 8080 | `-port 8080` | `-e APP_PORT=8080 -p 8080:8080` | HTTP server port | +| host | localhost | `-host 0.0.0.0` | `-e APP_HOST=0.0.0.0` | Host to bind server to | +| log-level | info | `-log-level info` | `-e LOG_LEVEL=info` | Log level (debug, info, warn, error) | +| log-style | json | `-log-style json` | `-e LOG_STYLE=json` | Log style (pretty, json) | **Understanding App Directory** @@ -72,8 +72,9 @@ npx -y @maximhq/bifrost -app-dir ./my-bifrost-data ``` **What's stored in app-dir:** + - `config.json` - Configuration file (optional) -- `config.db` - SQLite database for UI configuration +- `config.db` - SQLite database for UI configuration - `logs.db` - Request logs database **Note:** When using Bifrost via Docker, the volume you mount will be used as the app-dir. @@ -86,7 +87,7 @@ Navigate to **http://localhost:8080** in your browser: # macOS open http://localhost:8080 -# Linux +# Linux xdg-open http://localhost:8080 # Windows @@ -94,6 +95,7 @@ start http://localhost:8080 ``` 🖥️ **The Web UI provides:** + - **Visual provider setup** - Add API keys with clicks, not code - **Real-time configuration** - Changes apply immediately - **Live monitoring** - Request logs, metrics, and analytics @@ -131,17 +133,21 @@ Bifrost supports **two configuration approaches** - you cannot use both simultan ![Configuration via UI](../../media/ui-config.png) **When the UI is available:** + - No `config.json` file exists (Bifrost auto-creates SQLite database) - `config.json` exists with `config_store` configured ### Mode 2: File-based Configuration +You can view entire config schema [here](https://www.getbifrost.ai/schema) + **When to use:** Advanced setups, GitOps workflows, or when UI is not needed Create `config.json` in your app directory: ```json { + "$schema": "https://www.getbifrost.ai/schema", "client": { "drop_excess_requests": false }, @@ -168,12 +174,14 @@ Create `config.json` in your app directory: ``` **Without `config_store` in `config.json`:** + - **UI is disabled** - no real-time configuration possible - **Read-only mode** - `config.json` is never modified - **Memory-only** - all configurations loaded into memory at startup - **Restart required** - changes to `config.json` only apply after restart **With `config_store` in `config.json`:** + - **UI is enabled** - full real-time configuration via web interface - **Database check** - Bifrost checks if config store database exists and has data - **Empty DB**: Bootstraps database with `config.json` settings, then uses DB exclusively @@ -184,12 +192,22 @@ Create `config.json` in your app directory: If you want database persistence but prefer not to use the UI, note that modifying `config.json` after initial bootstrap has no effect when `config_store` is enabled. Use the public HTTP APIs to make configuration changes instead. **The Three Stores Explained:** + - **Config Store**: Stores provider configs, API keys, MCP settings - Required for UI functionality -- **Logs Store**: Stores request logs shown in UI - Optional, can be disabled +- **Logs Store**: Stores request logs shown in UI - Optional, can be disabled - **Vector Store**: Used for semantic caching - Optional, can be disabled ## PostgreSQL UTF8 Requirement + + The minimum PostgreSQL version required is 16 or above. + + + + For the log store, Bifrost creates materialized views to improve analytics performance. Ensure that the PostgreSQL user + has the necessary permissions to perform these operations on the target schema. + + If you use PostgreSQL for `config_store` or `logs_store`, the target database must use `UTF8` encoding. Use `template0` when creating the database so PostgreSQL applies UTF8 and locale settings explicitly: diff --git a/docs/quickstart/go-sdk/provider-configuration.mdx b/docs/quickstart/go-sdk/provider-configuration.mdx index 87c2901ece..234551522f 100644 --- a/docs/quickstart/go-sdk/provider-configuration.mdx +++ b/docs/quickstart/go-sdk/provider-configuration.mdx @@ -328,16 +328,35 @@ func (a *MyAccount) GetConfigForProvider(provider schemas.ModelProvider) (*schem Include the original provider response alongside Bifrost's standardized response format. Useful for debugging and accessing provider-specific metadata. +**Provider-level default** (applies to all requests for this provider): + ```go -func (a *MyAccount) GetConfigForProvider(ctx *context.Context, provider schemas.ModelProvider) (*schemas.ProviderConfig, error) { +func (a *MyAccount) GetConfigForProvider(provider schemas.ModelProvider) (*schemas.ProviderConfig, error) { return &schemas.ProviderConfig{ NetworkConfig: schemas.DefaultNetworkConfig, ConcurrencyAndBufferSize: schemas.DefaultConcurrencyAndBufferSize, - SendBackRawResponse: true, // Include raw provider response + SendBackRawResponse: true, }, nil } ``` +**Per-request override** (overrides the provider default for a single request): + +```go +ctx := context.Background() +ctx = context.WithValue(ctx, schemas.BifrostContextKeySendBackRawResponse, true) // or false to suppress + +response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4o-mini", + Input: messages, +}) + +if response.ChatResponse != nil { + rawResp := response.ChatResponse.ExtraFields.RawResponse // original provider JSON +} +``` + When enabled, the raw provider response appears in `ExtraFields.RawResponse`: ```go @@ -368,16 +387,35 @@ type BifrostResponseExtraFields struct { Include the original request sent to the provider alongside Bifrost's response. Useful for debugging request transformations and verifying what was actually sent to the provider. +**Provider-level default** (applies to all requests for this provider): + ```go -func (a *MyAccount) GetConfigForProvider(ctx *context.Context, provider schemas.ModelProvider) (*schemas.ProviderConfig, error) { +func (a *MyAccount) GetConfigForProvider(provider schemas.ModelProvider) (*schemas.ProviderConfig, error) { return &schemas.ProviderConfig{ NetworkConfig: schemas.DefaultNetworkConfig, ConcurrencyAndBufferSize: schemas.DefaultConcurrencyAndBufferSize, - SendBackRawRequest: true, // Include raw provider request + SendBackRawRequest: true, }, nil } ``` +**Per-request override** (overrides the provider default for a single request): + +```go +ctx := context.Background() +ctx = context.WithValue(ctx, schemas.BifrostContextKeySendBackRawRequest, true) // or false to suppress + +response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4o-mini", + Input: messages, +}) + +if response.ChatResponse != nil { + rawReq := response.ChatResponse.ExtraFields.RawRequest // exact JSON sent to the provider +} +``` + When enabled, the raw provider request appears in `ExtraFields.RawRequest`: ```go @@ -388,9 +426,42 @@ type BifrostResponseExtraFields struct { } ``` - -You can enable both `SendBackRawRequest` and `SendBackRawResponse` together to see the complete request-response cycle for debugging purposes. - +### Store Raw Request/Response + +Persist the raw provider request and response in the log record without necessarily returning them in the API response. This is orthogonal to the send-back flags — enabling this does not affect what the caller receives, and enabling send-back does not automatically store data in logs. Enable both to do both. + +**Provider-level default** (applies to all requests for this provider): + +```go +func (a *MyAccount) GetConfigForProvider(provider schemas.ModelProvider) (*schemas.ProviderConfig, error) { + return &schemas.ProviderConfig{ + NetworkConfig: schemas.DefaultNetworkConfig, + ConcurrencyAndBufferSize: schemas.DefaultConcurrencyAndBufferSize, + StoreRawRequestResponse: true, + }, nil +} +``` + +**Per-request override** (overrides the provider default for a single request): + +```go +ctx := context.Background() +ctx = context.WithValue(ctx, schemas.BifrostContextKeyStoreRawRequestResponse, true) // or false to disable + +response, err := client.ChatCompletionRequest(schemas.NewBifrostContext(ctx, schemas.NoDeadline), &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4o-mini", + Input: messages, +}) +// Raw data is persisted in the log record. +// ExtraFields.RawRequest/RawResponse are nil unless send-back flags are also enabled. +``` + + +`StoreRawRequestResponse` only has effect when the logging plugin is active — raw data is written to the log record by the logging plugin. Without it, enabling this flag captures the data but nothing persists it. + +`StoreRawRequestResponse`, `SendBackRawRequest`, and `SendBackRawResponse` are orthogonal controls — enabling any one does not imply the others. Enable any combination depending on whether you need raw data in logs, in the response, or both. + ## Provider-Specific Authentication diff --git a/examples/mcps/edge-case-server/package-lock.json b/examples/mcps/edge-case-server/package-lock.json index 0ca421d7dd..fcbc151bee 100644 --- a/examples/mcps/edge-case-server/package-lock.json +++ b/examples/mcps/edge-case-server/package-lock.json @@ -578,9 +578,9 @@ } }, "node_modules/hono": { - "version": "4.11.4", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.4.tgz", - "integrity": "sha512-U7tt8JsyrxSRKspfhtLET79pU8K+tInj5QZXs1jSugO1Vq5dFj3kmZsRldo29mTBfcjDRVRXrEZ6LS63Cog9ZA==", + "version": "4.12.14", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz", + "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==", "license": "MIT", "peer": true, "engines": { diff --git a/examples/mcps/edge-case-server/package.json b/examples/mcps/edge-case-server/package.json index 7479037933..af04926e32 100644 --- a/examples/mcps/edge-case-server/package.json +++ b/examples/mcps/edge-case-server/package.json @@ -17,5 +17,8 @@ "devDependencies": { "@types/node": "^20.10.0", "typescript": "^5.3.3" + }, + "overrides": { + "hono": "4.12.14" } } diff --git a/examples/mcps/error-test-server/package-lock.json b/examples/mcps/error-test-server/package-lock.json index 4a7c4383a9..9c568e69a3 100644 --- a/examples/mcps/error-test-server/package-lock.json +++ b/examples/mcps/error-test-server/package-lock.json @@ -578,9 +578,9 @@ } }, "node_modules/hono": { - "version": "4.11.4", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.4.tgz", - "integrity": "sha512-U7tt8JsyrxSRKspfhtLET79pU8K+tInj5QZXs1jSugO1Vq5dFj3kmZsRldo29mTBfcjDRVRXrEZ6LS63Cog9ZA==", + "version": "4.12.14", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz", + "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==", "license": "MIT", "peer": true, "engines": { diff --git a/examples/mcps/error-test-server/package.json b/examples/mcps/error-test-server/package.json index 7e97c1f630..4ae64ddb18 100644 --- a/examples/mcps/error-test-server/package.json +++ b/examples/mcps/error-test-server/package.json @@ -17,5 +17,8 @@ "devDependencies": { "@types/node": "^20.10.0", "typescript": "^5.3.3" + }, + "overrides": { + "hono": "4.12.14" } } diff --git a/examples/mcps/parallel-test-server/package-lock.json b/examples/mcps/parallel-test-server/package-lock.json index f76282d09d..ccbec0af9c 100644 --- a/examples/mcps/parallel-test-server/package-lock.json +++ b/examples/mcps/parallel-test-server/package-lock.json @@ -578,9 +578,9 @@ } }, "node_modules/hono": { - "version": "4.11.4", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.4.tgz", - "integrity": "sha512-U7tt8JsyrxSRKspfhtLET79pU8K+tInj5QZXs1jSugO1Vq5dFj3kmZsRldo29mTBfcjDRVRXrEZ6LS63Cog9ZA==", + "version": "4.12.14", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz", + "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==", "license": "MIT", "peer": true, "engines": { diff --git a/examples/mcps/parallel-test-server/package.json b/examples/mcps/parallel-test-server/package.json index 9d6ccc7d86..01268715f4 100644 --- a/examples/mcps/parallel-test-server/package.json +++ b/examples/mcps/parallel-test-server/package.json @@ -17,5 +17,8 @@ "devDependencies": { "@types/node": "^20.10.0", "typescript": "^5.3.3" + }, + "overrides": { + "hono": "4.12.14" } } diff --git a/examples/mcps/temperature/package-lock.json b/examples/mcps/temperature/package-lock.json index f8d7ce5a98..90aab93369 100644 --- a/examples/mcps/temperature/package-lock.json +++ b/examples/mcps/temperature/package-lock.json @@ -575,9 +575,9 @@ } }, "node_modules/hono": { - "version": "4.11.4", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.4.tgz", - "integrity": "sha512-U7tt8JsyrxSRKspfhtLET79pU8K+tInj5QZXs1jSugO1Vq5dFj3kmZsRldo29mTBfcjDRVRXrEZ6LS63Cog9ZA==", + "version": "4.12.14", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz", + "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==", "license": "MIT", "peer": true, "engines": { diff --git a/examples/mcps/temperature/package.json b/examples/mcps/temperature/package.json index 1157c6de1c..d4a7082705 100644 --- a/examples/mcps/temperature/package.json +++ b/examples/mcps/temperature/package.json @@ -16,5 +16,8 @@ "devDependencies": { "@types/node": "^20.0.0", "typescript": "^5.0.0" + }, + "overrides": { + "hono": "4.12.14" } } diff --git a/examples/mcps/test-tools-server/package-lock.json b/examples/mcps/test-tools-server/package-lock.json index fe7e0c417a..c26a693907 100644 --- a/examples/mcps/test-tools-server/package-lock.json +++ b/examples/mcps/test-tools-server/package-lock.json @@ -578,9 +578,9 @@ } }, "node_modules/hono": { - "version": "4.11.4", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.4.tgz", - "integrity": "sha512-U7tt8JsyrxSRKspfhtLET79pU8K+tInj5QZXs1jSugO1Vq5dFj3kmZsRldo29mTBfcjDRVRXrEZ6LS63Cog9ZA==", + "version": "4.12.14", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz", + "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==", "license": "MIT", "peer": true, "engines": { diff --git a/examples/mcps/test-tools-server/package.json b/examples/mcps/test-tools-server/package.json index 9345be227a..bcb6a6398b 100644 --- a/examples/mcps/test-tools-server/package.json +++ b/examples/mcps/test-tools-server/package.json @@ -17,5 +17,8 @@ "devDependencies": { "@types/node": "^20.10.0", "typescript": "^5.3.3" + }, + "overrides": { + "hono": "4.12.14" } } diff --git a/examples/plugins/hello-world-wasm-go/go.mod b/examples/plugins/hello-world-wasm-go/go.mod index c8dbf07457..5bdf8f6b7a 100644 --- a/examples/plugins/hello-world-wasm-go/go.mod +++ b/examples/plugins/hello-world-wasm-go/go.mod @@ -2,9 +2,8 @@ module github.com/maximhq/bifrost/examples/plugins/hello-world-wasm go 1.26.1 -require github.com/maximhq/bifrost/core v0.0.0-00010101000000-000000000000 +require github.com/maximhq/bifrost/core v1.4.17 -replace github.com/maximhq/bifrost/core => ../../../core require ( github.com/andybalholm/brotli v1.2.0 // indirect diff --git a/examples/plugins/hello-world/go.mod b/examples/plugins/hello-world/go.mod index 2c895cdafc..49e5687e52 100644 --- a/examples/plugins/hello-world/go.mod +++ b/examples/plugins/hello-world/go.mod @@ -1,8 +1,8 @@ module github.com/maximhq/bifrost/examples/plugins/hello-world -go 1.26.2 +go 1.26.1 -require github.com/maximhq/bifrost/core v1.4.19 +require github.com/maximhq/bifrost/core v1.4.22 require ( github.com/andybalholm/brotli v1.2.0 // indirect diff --git a/examples/plugins/hello-world/go.sum b/examples/plugins/hello-world/go.sum index 2c21d7e547..31af030933 100644 --- a/examples/plugins/hello-world/go.sum +++ b/examples/plugins/hello-world/go.sum @@ -39,8 +39,8 @@ github.com/mailru/easyjson v0.9.1 h1:LbtsOm5WAswyWbvTEOqhypdPeZzHavpZx96/n553mR8 github.com/mailru/easyjson v0.9.1/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mark3labs/mcp-go v0.43.2 h1:21PUSlWWiSbUPQwXIJ5WKlETixpFpq+WBpbMGDSVy/I= github.com/mark3labs/mcp-go v0.43.2/go.mod h1:YnJfOL382MIWDx1kMY+2zsRHU/q78dBg9aFb8W6Thdw= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= diff --git a/flake.lock b/flake.lock index 089c1fdaa1..6eef0dc1c4 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1773144721, - "narHash": "sha256-1fa382ppXYOqqFIECQ3A1qogn/QLwNFvpjx/WivuNBc=", + "lastModified": 1776062742, + "narHash": "sha256-CYncVXVsUzYK+JZldSuK08ibXrAIJh+T22V13Z4ySS0=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "fb30d84f085815771af9decacb4b41b841798601", + "rev": "1c742e001e98f5191a5586751e16311fe1481f61", "type": "github" }, "original": { diff --git a/framework/configstore/migrations.go b/framework/configstore/migrations.go index 0a7855fdc9..422e5d450a 100644 --- a/framework/configstore/migrations.go +++ b/framework/configstore/migrations.go @@ -281,7 +281,6 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error { return err } if err := migrationAddOutputCostPerVideoPerSecond(ctx, db); err != nil { - return err } if err := migrationDropEnableGovernanceColumn(ctx, db); err != nil { @@ -338,6 +337,9 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error { if err := migrationAddFlexTierPricingColumns(ctx, db); err != nil { return err } + if err := migrationNormalizeOtelTraceType(ctx, db); err != nil { + return err + } return nil } @@ -5078,3 +5080,47 @@ func migrationAddWhitelistedRoutesJSONColumn(ctx context.Context, db *gorm.DB) e } return nil } + +// migrationNormalizeOtelTraceType rewrites the legacy OTEL plugin trace_type value "otel" to "genai_extension". +// No-op if the plugin row is missing or trace_type is already correct. +func migrationNormalizeOtelTraceType(ctx context.Context, db *gorm.DB) error { + m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{ + ID: "normalize_otel_trace_type", + Migrate: func(tx *gorm.DB) error { + tx = tx.WithContext(ctx) + + var plugin tables.TablePlugin + err := tx.Where("name = ?", "otel").First(&plugin).Error + if err != nil { + if err == gorm.ErrRecordNotFound { + return nil + } + return fmt.Errorf("failed to load otel plugin row: %w", err) + } + + cfgMap, ok := plugin.Config.(map[string]any) + if !ok || len(cfgMap) == 0 { + return nil + } + if tt, _ := cfgMap["trace_type"].(string); tt != "otel" { + return nil + } + + cfgMap["trace_type"] = "genai_extension" + plugin.Config = cfgMap + plugin.ConfigJSON = "" + plugin.EncryptionStatus = tables.EncryptionStatusPlainText + + if err := tx.Save(&plugin).Error; err != nil { + return fmt.Errorf("failed to save normalized otel config: %w", err) + } + log.Printf("[Migration] Normalized otel trace_type 'otel' to 'genai_extension'") + return nil + }, + Rollback: func(tx *gorm.DB) error { return nil }, + }}) + if err := m.Migrate(); err != nil { + return fmt.Errorf("error running normalize_otel_trace_type migration: %s", err.Error()) + } + return nil +} diff --git a/framework/go.mod b/framework/go.mod index e872c33262..b076183c50 100644 --- a/framework/go.mod +++ b/framework/go.mod @@ -1,10 +1,10 @@ module github.com/maximhq/bifrost/framework -go 1.26.2 +go 1.26.1 require ( github.com/google/uuid v1.6.0 - github.com/maximhq/bifrost/core v1.4.19 + github.com/maximhq/bifrost/core v1.4.22 github.com/pinecone-io/go-pinecone/v5 v5.3.0 github.com/qdrant/go-client v1.16.2 github.com/redis/go-redis/v9 v9.17.2 diff --git a/framework/go.sum b/framework/go.sum index 7a1261009c..3a5d13eb9e 100644 --- a/framework/go.sum +++ b/framework/go.sum @@ -193,8 +193,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= diff --git a/framework/logstore/asyncjob.go b/framework/logstore/asyncjob.go index 38173c6840..0eb2e8c7de 100644 --- a/framework/logstore/asyncjob.go +++ b/framework/logstore/asyncjob.go @@ -9,6 +9,7 @@ import ( "github.com/bytedance/sonic" "github.com/google/uuid" + bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" "github.com/valyala/fasthttp" @@ -80,11 +81,13 @@ func (e *AsyncJobExecutor) RetrieveJob(ctx context.Context, jobID string, vkValu } // SubmitJob creates a pending job, starts background execution, and returns the job record. -func (e *AsyncJobExecutor) SubmitJob(virtualKeyValue *string, resultTTL int, operation AsyncOperation, operationType schemas.RequestType) (*AsyncJob, error) { +func (e *AsyncJobExecutor) SubmitJob(bifrostCtx *schemas.BifrostContext, resultTTL int, operation AsyncOperation, operationType schemas.RequestType) (*AsyncJob, error) { if resultTTL <= 0 { resultTTL = DefaultAsyncJobResultTTL } + virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) + var virtualKeyID *string if virtualKeyValue != nil { vk, ok := e.governanceStore.GetVirtualKey(*virtualKeyValue) @@ -109,15 +112,24 @@ func (e *AsyncJobExecutor) SubmitJob(virtualKeyValue *string, resultTTL int, ope return nil, fmt.Errorf("failed to create async job: %w", err) } - go e.executeJob(job.ID, job.ResultTTL, operation) + go e.executeJob(job.ID, job.ResultTTL, operation, bifrostCtx.GetUserValues()) return job, nil } // executeJob runs the operation in the background and updates the job record. -func (e *AsyncJobExecutor) executeJob(jobID string, resultTTL int, operation AsyncOperation) { +func (e *AsyncJobExecutor) executeJob(jobID string, resultTTL int, operation AsyncOperation, contextValues map[any]any) { ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + // Restore original request context values (virtual key, tracing headers, etc.) + for k, v := range contextValues { + ctx.SetValue(k, v) + } + + ctx.ClearValue(schemas.BifrostContextKeyTraceID) + ctx.ClearValue(schemas.BifrostContextKeyParentSpanID) + ctx.ClearValue(schemas.BifrostContextKeySpanID) + markFailed := func(msg string) { now := time.Now().UTC() expiresAt := now.Add(time.Duration(resultTTL) * time.Second) @@ -284,3 +296,13 @@ func (c *AsyncJobCleaner) cleanupExpiredJobs(ctx context.Context) { c.logger.Warn("async job cleanup: deleted %d stale processing jobs (stuck > %dh)", staleDeleted, asyncJobStaleProcessingHours) } } + +// getVirtualKeyFromContext extracts the virtual key value from context. +// Returns nil if no VK is present (e.g., direct key mode or no governance). +func getVirtualKeyFromContext(ctx *schemas.BifrostContext) *string { + vkValue := bifrost.GetStringFromContext(ctx, schemas.BifrostContextKeyVirtualKey) + if vkValue == "" { + return nil + } + return &vkValue +} diff --git a/framework/modelcatalog/main.go b/framework/modelcatalog/main.go index 8972689637..6603d04db7 100644 --- a/framework/modelcatalog/main.go +++ b/framework/modelcatalog/main.go @@ -670,10 +670,23 @@ func (mc *ModelCatalog) GetProvidersForModel(model string) []schemas.ModelProvid // // Explicit allowedModels without prefix // mc.IsModelAllowedForProvider("openai", "gpt-4o", []string{"gpt-4o"}) // // Returns: true (direct match) -func (mc *ModelCatalog) IsModelAllowedForProvider(provider schemas.ModelProvider, model string, allowedModels []string) bool { - // Case 1: Empty allowedModels = use catalog to determine support +func (mc *ModelCatalog) IsModelAllowedForProvider(provider schemas.ModelProvider, model string, providerConfig *configstore.ProviderConfig, allowedModels []string) bool { + isCustomProvider := false + hasListModelsEndpointDisabled := false + if providerConfig != nil { + isCustomProvider = providerConfig.CustomProviderConfig != nil + hasListModelsEndpointDisabled = !providerConfig.CustomProviderConfig.IsOperationAllowed(schemas.ListModelsRequest) + } + + // Case 1: Unrestricted allowedModels (empty or ["*"]) = use catalog to determine support // This leverages GetProvidersForModel which already handles all cross-provider logic - if len(allowedModels) == 0 { + isUnrestricted := len(allowedModels) == 0 || (len(allowedModels) == 1 && allowedModels[0] == "*") + if isUnrestricted { + // Custom providers without a list-models endpoint can't be in the catalog, + // so allow any model through rather than blocking on missing catalog data + if isCustomProvider && hasListModelsEndpointDisabled { + return true + } supportedProviders := mc.GetProvidersForModel(model) return slices.Contains(supportedProviders, provider) } diff --git a/framework/modelcatalog/main_test.go b/framework/modelcatalog/main_test.go index 3b7e67e702..324b28c791 100644 --- a/framework/modelcatalog/main_test.go +++ b/framework/modelcatalog/main_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/configstore" configstoreTables "github.com/maximhq/bifrost/framework/configstore/tables" "github.com/stretchr/testify/assert" ) @@ -154,5 +155,56 @@ func TestIsModelAllowedForProvider_PrefixedAllowedModelInCatalog(t *testing.T) { nil, ) - assert.True(t, mc.IsModelAllowedForProvider(schemas.OpenRouter, "gpt-4o", []string{"openai/gpt-4o"})) + providerConfig := configstore.ProviderConfig{} + + assert.True(t, mc.IsModelAllowedForProvider(schemas.OpenRouter, "gpt-4o", &providerConfig, []string{"openai/gpt-4o"})) +} + +func TestIsModelAllowedForProvider_CustomProviderListModelsDisabled(t *testing.T) { + mc := newTestCatalog(nil, nil) + + // Custom provider with list-models disabled + ["*"] → should return true + providerConfig := configstore.ProviderConfig{ + CustomProviderConfig: &schemas.CustomProviderConfig{ + AllowedRequests: &schemas.AllowedRequests{ + ListModels: false, + }, + }, + } + assert.True(t, mc.IsModelAllowedForProvider("custom-provider", "any-model", &providerConfig, []string{"*"})) +} + +func TestIsModelAllowedForProvider_CustomProviderListModelsEnabled(t *testing.T) { + mc := newTestCatalog( + map[schemas.ModelProvider][]string{ + "custom-provider": {"model-a"}, + }, + nil, + ) + + // Custom provider with list-models enabled + ["*"] → should go through catalog + providerConfig := configstore.ProviderConfig{ + CustomProviderConfig: &schemas.CustomProviderConfig{ + AllowedRequests: &schemas.AllowedRequests{ + ListModels: true, + }, + }, + } + // model-a is in catalog → allowed + assert.True(t, mc.IsModelAllowedForProvider("custom-provider", "model-a", &providerConfig, []string{"*"})) + // model-b is NOT in catalog → denied + assert.False(t, mc.IsModelAllowedForProvider("custom-provider", "model-b", &providerConfig, []string{"*"})) +} + +func TestIsModelAllowedForProvider_NilProviderConfig(t *testing.T) { + mc := newTestCatalog( + map[schemas.ModelProvider][]string{ + "some-provider": {"model-x"}, + }, + nil, + ) + + // nil providerConfig + ["*"] → should go through catalog (not bypass) + assert.True(t, mc.IsModelAllowedForProvider("some-provider", "model-x", nil, []string{"*"})) + assert.False(t, mc.IsModelAllowedForProvider("some-provider", "model-y", nil, []string{"*"})) } diff --git a/framework/streaming/accumulator.go b/framework/streaming/accumulator.go index 0af217777d..66fe43339c 100644 --- a/framework/streaming/accumulator.go +++ b/framework/streaming/accumulator.go @@ -152,6 +152,7 @@ func (a *Accumulator) createStreamAccumulator(requestID string) *StreamAccumulat MaxResponsesChunkIndex: -1, MaxTranscriptionChunkIndex: -1, MaxAudioChunkIndex: -1, + TerminalErrorChunkIndex: -1, IsComplete: false, mu: sync.Mutex{}, Timestamp: now, @@ -186,6 +187,7 @@ func (a *Accumulator) getOrCreateStreamAccumulator(requestID string) *StreamAccu MaxResponsesChunkIndex: -1, MaxTranscriptionChunkIndex: -1, MaxAudioChunkIndex: -1, + TerminalErrorChunkIndex: -1, IsComplete: false, mu: sync.Mutex{}, Timestamp: now, @@ -378,16 +380,21 @@ func (a *Accumulator) cleanupStreamAccumulator(requestID string) { } } - // ProcessStreamingResponse processes a streaming response // It handles chat, audio, and responses streaming responses func (a *Accumulator) ProcessStreamingResponse(ctx *schemas.BifrostContext, result *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*ProcessedStreamResponse, error) { - // Check if this is a streaming response - if result == nil { - return nil, fmt.Errorf("result is nil") + // Check if at least one of result or error is provided + if result == nil && bifrostErr == nil { + return nil, fmt.Errorf("result and error are nil") + } + + var requestType schemas.RequestType + if result != nil { + requestType = result.GetExtraFields().RequestType + } else if bifrostErr != nil { + requestType = bifrostErr.ExtraFields.RequestType } - extraFields := result.GetExtraFields() - requestType := extraFields.RequestType + isAudioStreaming := requestType == schemas.SpeechStreamRequest || requestType == schemas.TranscriptionStreamRequest isChatStreaming := requestType == schemas.ChatCompletionStreamRequest || requestType == schemas.TextCompletionStreamRequest isResponsesStreaming := requestType == schemas.ResponsesStreamRequest diff --git a/framework/streaming/responses.go b/framework/streaming/responses.go index 65c7635226..367c9a4fa9 100644 --- a/framework/streaming/responses.go +++ b/framework/streaming/responses.go @@ -6,6 +6,7 @@ import ( "strings" "time" + "github.com/bytedance/sonic" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" ) @@ -898,6 +899,22 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont if bifrostErr != nil { chunk.FinishReason = bifrost.Ptr("error") + if bifrostErr.ExtraFields.RawResponse != nil { + if rawBytes, marshalErr := sonic.Marshal(bifrostErr.ExtraFields.RawResponse); marshalErr == nil { + chunk.RawResponse = bifrost.Ptr(string(rawBytes)) + } + } + // Assign a stable trailing index; reuse on duplicate plugin calls so dedup fires correctly. + accumulator := a.getOrCreateStreamAccumulator(requestID) + accumulator.mu.Lock() + if accumulator.TerminalErrorChunkIndex >= 0 { + chunk.ChunkIndex = accumulator.TerminalErrorChunkIndex + } else { + accumulator.MaxResponsesChunkIndex++ + chunk.ChunkIndex = accumulator.MaxResponsesChunkIndex + accumulator.TerminalErrorChunkIndex = chunk.ChunkIndex + } + accumulator.mu.Unlock() } else if result != nil && result.ResponsesStreamResponse != nil { if result.ResponsesStreamResponse.ExtraFields.RawResponse != nil { chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.ResponsesStreamResponse.ExtraFields.RawResponse)) diff --git a/framework/streaming/types.go b/framework/streaming/types.go index 9d7cf0183f..96567d1944 100644 --- a/framework/streaming/types.go +++ b/framework/streaming/types.go @@ -135,6 +135,9 @@ type StreamAccumulator struct { MaxTranscriptionChunkIndex int MaxAudioChunkIndex int + // TerminalErrorChunkIndex holds the reserved chunk index for the terminal error (-1 = unset); reused across plugin calls for correct dedup. + TerminalErrorChunkIndex int + IsComplete bool FinalTimestamp time.Time mu sync.Mutex diff --git a/framework/version b/framework/version index 1f8d37f959..e4e352ba24 100644 --- a/framework/version +++ b/framework/version @@ -1 +1 @@ -1.2.38 +1.2.39 diff --git a/helm-charts/bifrost/Chart.yaml b/helm-charts/bifrost/Chart.yaml index b4613d3d66..5608a5bb48 100644 --- a/helm-charts/bifrost/Chart.yaml +++ b/helm-charts/bifrost/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: bifrost description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers type: application -version: 2.0.17 +version: 2.1.1 appVersion: "1.4.11" keywords: - ai @@ -16,5 +16,4 @@ sources: maintainers: - name: Bifrost Team email: support@getbifrost.ai -icon: https://www.getbifrost.ai/favicon.png - +icon: https://www.getbifrost.ai/favicon.png \ No newline at end of file diff --git a/helm-charts/bifrost/README.md b/helm-charts/bifrost/README.md index 86b14a1357..b0c4435491 100644 --- a/helm-charts/bifrost/README.md +++ b/helm-charts/bifrost/README.md @@ -4,18 +4,49 @@ Official Helm charts for deploying [Bifrost](https://github.com/maximhq/bifrost) - a high-performance AI gateway with unified interface for multiple providers. -**Latest Version:** 2.0.17 +**Latest Version:** 2.1.1 ## Changelog +### v2.1.1 + +- Made `bifrost.governance.virtualKeys[].value` optional — template no longer fails when the field is omitted, allowing the backend to auto-generate the virtual key value +- When `value` is absent, the rendered `config.json` omits the field entirely (consistent with other optional VK fields) + +### v2.1.0-prerelease2 (prerelease) + +- Synced helm `values.schema.json` with transport `config.schema.json` — fixed virtual key and budget drift: + - Removed `required: [mcp_client_id]` constraint on `virtualKeys[].mcp_configs[]` items — canonical schema accepts either `mcp_client_id` (DB form) or `mcp_client_name` (config-file form, resolved to ID at startup) + - Added `mcp_client_name` as an allowed property on `virtualKeys[].mcp_configs[]` items + - Added `calendar_aligned` (boolean) on `virtualKeys[]` — field now lives on the virtual key, applies uniformly to all budgets under it + - Removed stale `budget_id` from `virtualKeys[]` — `TableVirtualKey` has no `BudgetID`; budgets link via foreign key from the budget table + - Removed stale `calendar_aligned` from `budgets[]` — moved to virtual key level + +### v2.0.17 + +- Added object storage support (S3/GCS) for offloading log payloads from the database +- Added `storage.logsStore.objectStorage` configuration with S3 and GCS backend support +- Added object storage credential injection from Kubernetes secrets (`existingSecret`) +- Added `object_storage` schema to `config.schema.json` under `logs_store` +- Updated deployment and stateful templates with object storage secret env vars + ### v2.0.16 - Fixed disabled custom plugins being completely removed from rendered config.json instead of being kept with `enabled: false` ### v2.0.15 -- Added `whitelistedRoutes` client config property for routes that bypass auth middleware -- Added `whitelistedRoutes` to Helm schema, values, and template rendering +- Synced helm schema with transport `config.schema.json` — added missing properties: + - `client.mcpDisableAutoToolInject` — disable automatic MCP tool injection + - `governance.budgets[].calendar_aligned` — snap budget resets to calendar boundaries + - `governance.pricingOverrides` — scoped pricing overrides for the model catalog + - `mcp.clientConfigs[].allowedExtraHeaders` — header allowlist per MCP client + - `mcp.clientConfigs[].allowOnAllVirtualKeys` — make MCP server accessible to all virtual keys + - `mcp.toolManagerConfig.disableAutoToolInject` — disable auto tool injection at manager level + - `networkConfig.beta_header_overrides` — override Anthropic beta header support per provider + - `websocket` — full WebSocket gateway tuning (connections, pool, transcript buffer) +- Fixed SSE `connectionString` not being rendered in `_helpers.tpl` for MCP clients +- Added template rendering for all new properties in `_helpers.tpl` ### v2.0.14 @@ -443,6 +474,43 @@ autoscaling: targetMemoryUtilizationPercentage: 80 ``` +### Referencing Secrets in MCP Headers + +`bifrost.mcp.clientConfigs[].headers` is a free-form `map` +whose values can contain auth tokens. The chart does not wrap this map with +a bespoke `secretRef` — a per-header dict would explode the values surface. +Instead, use the standard pattern: + +1. Write `env.MY_HEADER_VAR` as the header value in `values.yaml`: + ```yaml + bifrost: + mcp: + clientConfigs: + - name: "my-mcp" + connectionType: "http" + headers: + Authorization: "env.MY_MCP_AUTH" + ``` +2. Inject the env var into the pod via the chart's top-level `envFrom:` or + `env:` pass-through — e.g., in `values.yaml`: + ```yaml + envFrom: + - secretRef: + name: my-mcp-auth-secret + # OR: + env: + - name: MY_MCP_AUTH + valueFrom: + secretKeyRef: + name: my-mcp-auth-secret + key: authorization + ``` + +For `bifrost.mcp.clientConfigs[].connectionString` itself, prefer the +chart-native `secretRef` (`name` + `connectionStringKey`) instead — the +chart will inject `BIFROST_MCP__CONNECTION_STRING` and rewrite the +config automatically. + ## Example Configurations The chart includes pre-configured examples in `values-examples/`: @@ -606,7 +674,7 @@ bifrost: config: service_name: "bifrost" collector_url: "http://otel-collector:4317" - trace_type: "otel" + trace_type: "genai_extension" protocol: "grpc" ``` diff --git a/helm-charts/bifrost/templates/_helpers.tpl b/helm-charts/bifrost/templates/_helpers.tpl index 956fbbdf39..cd1cefc950 100644 --- a/helm-charts/bifrost/templates/_helpers.tpl +++ b/helm-charts/bifrost/templates/_helpers.tpl @@ -227,8 +227,21 @@ false {{- if .Values.bifrost.client.maxRequestBodySizeMb }} {{- $_ := set $client "max_request_body_size_mb" .Values.bifrost.client.maxRequestBodySizeMb }} {{- end }} -{{- if hasKey .Values.bifrost.client "enableLitellmFallbacks" }} -{{- $_ := set $client "enable_litellm_fallbacks" .Values.bifrost.client.enableLitellmFallbacks }} +{{- if .Values.bifrost.client.compat }} +{{- $compat := dict }} +{{- if hasKey .Values.bifrost.client.compat "convertTextToChat" }} +{{- $_ := set $compat "convert_text_to_chat" .Values.bifrost.client.compat.convertTextToChat }} +{{- end }} +{{- if hasKey .Values.bifrost.client.compat "convertChatToResponses" }} +{{- $_ := set $compat "convert_chat_to_responses" .Values.bifrost.client.compat.convertChatToResponses }} +{{- end }} +{{- if hasKey .Values.bifrost.client.compat "shouldDropParams" }} +{{- $_ := set $compat "should_drop_params" .Values.bifrost.client.compat.shouldDropParams }} +{{- end }} +{{- if hasKey .Values.bifrost.client.compat "shouldConvertParams" }} +{{- $_ := set $compat "should_convert_params" .Values.bifrost.client.compat.shouldConvertParams }} +{{- end }} +{{- $_ := set $client "compat" $compat }} {{- end }} {{- if .Values.bifrost.client.prometheusLabels }} {{- $_ := set $client "prometheus_labels" .Values.bifrost.client.prometheusLabels }} @@ -284,6 +297,12 @@ false {{- if hasKey .Values.bifrost.client "hideDeletedVirtualKeysInFilters" }} {{- $_ := set $client "hide_deleted_virtual_keys_in_filters" .Values.bifrost.client.hideDeletedVirtualKeysInFilters }} {{- end }} +{{- if hasKey .Values.bifrost.client "mcpDisableAutoToolInject" }} +{{- $_ := set $client "mcp_disable_auto_tool_inject" .Values.bifrost.client.mcpDisableAutoToolInject }} +{{- end }} +{{- if .Values.bifrost.client.routingChainMaxDepth }} +{{- $_ := set $client "routing_chain_max_depth" .Values.bifrost.client.routingChainMaxDepth }} +{{- end }} {{- $_ := set $config "client" $client }} {{- end }} {{- /* Framework */ -}} @@ -335,7 +354,8 @@ false {{- if .Values.bifrost.governance.virtualKeys }} {{- $vks := list }} {{- range .Values.bifrost.governance.virtualKeys }} -{{- $vk := dict "id" .id "name" .name "value" .value }} +{{- $vk := dict "id" .id "name" .name }} +{{- if .value }}{{- $_ := set $vk "value" .value }}{{- end }} {{- if .description }}{{- $_ := set $vk "description" .description }}{{- end }} {{- if hasKey . "is_active" }}{{- $_ := set $vk "is_active" .is_active }}{{- end }} {{- if .team_id }}{{- $_ := set $vk "team_id" .team_id }}{{- end }} @@ -357,6 +377,9 @@ false {{- if .Values.bifrost.governance.providers }} {{- $_ := set $governance "providers" .Values.bifrost.governance.providers }} {{- end }} +{{- if .Values.bifrost.governance.pricingOverrides }} +{{- $_ := set $governance "pricing_overrides" .Values.bifrost.governance.pricingOverrides }} +{{- end }} {{- if .Values.bifrost.governance.authConfig }} {{- $authConfig := dict }} {{- if and .Values.bifrost.governance.authConfig.existingSecret .Values.bifrost.governance.authConfig.usernameKey }} @@ -379,7 +402,7 @@ false {{- $_ := set $governance "auth_config" $authConfig }} {{- end }} {{- end }} -{{- if or $governance.budgets $governance.rate_limits $governance.customers $governance.teams $governance.virtual_keys $governance.routing_rules $governance.model_configs $governance.providers $governance.auth_config }} +{{- if or $governance.budgets $governance.rate_limits $governance.customers $governance.teams $governance.virtual_keys $governance.routing_rules $governance.model_configs $governance.providers $governance.pricing_overrides $governance.auth_config }} {{- $_ := set $config "governance" $governance }} {{- end }} {{- end }} @@ -466,16 +489,17 @@ false {{- end }} {{- $_ := set $config "cluster_config" $cluster }} {{- end }} -{{- /* SAML Config */ -}} -{{- if and .Values.bifrost.saml .Values.bifrost.saml.enabled }} -{{- $saml := dict "enabled" true }} -{{- if .Values.bifrost.saml.provider }} -{{- $_ := set $saml "provider" .Values.bifrost.saml.provider }} +{{- /* SCIM Config */ -}} +{{- $scimValues := .Values.bifrost.scim }} +{{- if and $scimValues $scimValues.enabled }} +{{- $scim := dict "enabled" true }} +{{- if $scimValues.provider }} +{{- $_ := set $scim "provider" $scimValues.provider }} {{- end }} -{{- if .Values.bifrost.saml.config }} -{{- $_ := set $saml "config" .Values.bifrost.saml.config }} +{{- if $scimValues.config }} +{{- $_ := set $scim "config" $scimValues.config }} {{- end }} -{{- $_ := set $config "saml_config" $saml }} +{{- $_ := set $config "scim_config" $scim }} {{- end }} {{- /* Load Balancer Config */ -}} {{- if and .Values.bifrost.loadBalancer .Values.bifrost.loadBalancer.enabled }} @@ -552,6 +576,64 @@ false {{- $sqliteLogsStore := dict "enabled" true "type" "sqlite" "config" (dict "path" (printf "%s/logs.db" .Values.bifrost.appDir)) }} {{- $_ := set $config "logs_store" $sqliteLogsStore }} {{- end }} +{{- /* Object Storage for log payloads */ -}} +{{- if and .Values.storage.logsStore.objectStorage .Values.storage.logsStore.objectStorage.enabled }} +{{- $os := .Values.storage.logsStore.objectStorage }} +{{- $osConfig := dict "type" $os.type "bucket" $os.bucket }} +{{- if $os.prefix }} +{{- $_ := set $osConfig "prefix" $os.prefix }} +{{- end }} +{{- if $os.compress }} +{{- $_ := set $osConfig "compress" true }} +{{- end }} +{{- if eq $os.type "s3" }} +{{- if $os.region }} +{{- $_ := set $osConfig "region" $os.region }} +{{- end }} +{{- if $os.endpoint }} +{{- $_ := set $osConfig "endpoint" $os.endpoint }} +{{- end }} +{{- if $os.existingSecret }} +{{- if $os.accessKeyIdKey }} +{{- $_ := set $osConfig "access_key_id" "env.BIFROST_OBJECT_STORAGE_ACCESS_KEY_ID" }} +{{- end }} +{{- if $os.secretAccessKeyKey }} +{{- $_ := set $osConfig "secret_access_key" "env.BIFROST_OBJECT_STORAGE_SECRET_ACCESS_KEY" }} +{{- end }} +{{- if $os.sessionTokenKey }} +{{- $_ := set $osConfig "session_token" "env.BIFROST_OBJECT_STORAGE_SESSION_TOKEN" }} +{{- end }} +{{- $_ := set $osConfig "role_arn" "env.BIFROST_OBJECT_STORAGE_ROLE_ARN" }} +{{- else }} +{{- if $os.accessKeyId }} +{{- $_ := set $osConfig "access_key_id" $os.accessKeyId }} +{{- end }} +{{- if $os.secretAccessKey }} +{{- $_ := set $osConfig "secret_access_key" $os.secretAccessKey }} +{{- end }} +{{- if $os.sessionToken }} +{{- $_ := set $osConfig "session_token" $os.sessionToken }} +{{- end }} +{{- if $os.roleArn }} +{{- $_ := set $osConfig "role_arn" $os.roleArn }} +{{- end }} +{{- end }} +{{- if $os.forcePathStyle }} +{{- $_ := set $osConfig "force_path_style" true }} +{{- end }} +{{- end }} +{{- if eq $os.type "gcs" }} +{{- if $os.projectId }} +{{- $_ := set $osConfig "project_id" $os.projectId }} +{{- end }} +{{- if $os.existingSecret }} +{{- $_ := set $osConfig "credentials_json" "env.BIFROST_OBJECT_STORAGE_CREDENTIALS_JSON" }} +{{- else if $os.credentialsJson }} +{{- $_ := set $osConfig "credentials_json" $os.credentialsJson }} +{{- end }} +{{- end }} +{{- $_ := set (index $config "logs_store") "object_storage" $osConfig }} +{{- end }} {{- end }} {{- /* Vector Store */ -}} {{- if and .Values.vectorStore.enabled (ne .Values.vectorStore.type "none") }} @@ -682,6 +764,10 @@ false {{- if and (eq $client.connectionType "websocket") $client.websocketConfig }} {{- $_ := set $cc "connection_string" $client.websocketConfig.url }} {{- end }} +{{- /* Map connectionString for SSE connections */ -}} +{{- if and (eq $client.connectionType "sse") $client.connectionString }} +{{- $_ := set $cc "connection_string" $client.connectionString }} +{{- end }} {{- /* Map stdioConfig -> stdio_config */ -}} {{- if $client.stdioConfig }} {{- $stdio := dict "command" $client.stdioConfig.command }} @@ -724,6 +810,17 @@ false {{- if $client.toolPricing }} {{- $_ := set $cc "tool_pricing" $client.toolPricing }} {{- end }} +{{- if $client.allowedExtraHeaders }} +{{- $_ := set $cc "allowed_extra_headers" $client.allowedExtraHeaders }} +{{- end }} +{{- if hasKey $client "allowOnAllVirtualKeys" }} +{{- $_ := set $cc "allow_on_all_virtual_keys" $client.allowOnAllVirtualKeys }} +{{- end }} +{{- /* Override connection_string with env var placeholder when secretRef is set */ -}} +{{- if and $client.secretRef $client.secretRef.name }} +{{- $envName := printf "BIFROST_MCP_%s_CONNECTION_STRING" (regexReplaceAll "[^A-Z0-9]+" (upper $client.name) "_") }} +{{- $_ := set $cc "connection_string" (printf "env.%s" $envName) }} +{{- end }} {{- $clientConfigs = append $clientConfigs $cc }} {{- end }} {{- $mcpConfig := dict "client_configs" $clientConfigs }} @@ -738,6 +835,9 @@ false {{- if .Values.bifrost.mcp.toolManagerConfig.codeModeBindingLevel }} {{- $_ := set $tmConfig "code_mode_binding_level" .Values.bifrost.mcp.toolManagerConfig.codeModeBindingLevel }} {{- end }} +{{- if hasKey .Values.bifrost.mcp.toolManagerConfig "disableAutoToolInject" }} +{{- $_ := set $tmConfig "disable_auto_tool_inject" .Values.bifrost.mcp.toolManagerConfig.disableAutoToolInject }} +{{- end }} {{- if $tmConfig }} {{- $_ := set $mcpConfig "tool_manager_config" $tmConfig }} {{- end }} @@ -913,6 +1013,62 @@ false {{- $_ := set $config "audit_logs" $auditLogs }} {{- end }} {{- end }} +{{- /* Large Payload Optimization */ -}} +{{- if .Values.bifrost.largePayloadOptimization }} +{{- $lpo := dict }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "enabled" }} +{{- $_ := set $lpo "enabled" .Values.bifrost.largePayloadOptimization.enabled }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "requestThresholdBytes" }} +{{- $_ := set $lpo "request_threshold_bytes" .Values.bifrost.largePayloadOptimization.requestThresholdBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "responseThresholdBytes" }} +{{- $_ := set $lpo "response_threshold_bytes" .Values.bifrost.largePayloadOptimization.responseThresholdBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "prefetchSizeBytes" }} +{{- $_ := set $lpo "prefetch_size_bytes" .Values.bifrost.largePayloadOptimization.prefetchSizeBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "maxPayloadBytes" }} +{{- $_ := set $lpo "max_payload_bytes" .Values.bifrost.largePayloadOptimization.maxPayloadBytes }} +{{- end }} +{{- if hasKey .Values.bifrost.largePayloadOptimization "truncatedLogBytes" }} +{{- $_ := set $lpo "truncated_log_bytes" .Values.bifrost.largePayloadOptimization.truncatedLogBytes }} +{{- end }} +{{- if $lpo }} +{{- $_ := set $config "large_payload_optimization" $lpo }} +{{- end }} +{{- end }} +{{- /* WebSocket Config */ -}} +{{- if .Values.bifrost.websocket }} +{{- $ws := dict }} +{{- if .Values.bifrost.websocket.maxConnectionsPerUser }} +{{- $_ := set $ws "max_connections_per_user" .Values.bifrost.websocket.maxConnectionsPerUser }} +{{- end }} +{{- if .Values.bifrost.websocket.transcriptBufferSize }} +{{- $_ := set $ws "transcript_buffer_size" .Values.bifrost.websocket.transcriptBufferSize }} +{{- end }} +{{- if .Values.bifrost.websocket.pool }} +{{- $pool := dict }} +{{- if .Values.bifrost.websocket.pool.maxIdlePerKey }} +{{- $_ := set $pool "max_idle_per_key" .Values.bifrost.websocket.pool.maxIdlePerKey }} +{{- end }} +{{- if .Values.bifrost.websocket.pool.maxTotalConnections }} +{{- $_ := set $pool "max_total_connections" .Values.bifrost.websocket.pool.maxTotalConnections }} +{{- end }} +{{- if .Values.bifrost.websocket.pool.idleTimeoutSeconds }} +{{- $_ := set $pool "idle_timeout_seconds" .Values.bifrost.websocket.pool.idleTimeoutSeconds }} +{{- end }} +{{- if .Values.bifrost.websocket.pool.maxConnectionLifetimeSeconds }} +{{- $_ := set $pool "max_connection_lifetime_seconds" .Values.bifrost.websocket.pool.maxConnectionLifetimeSeconds }} +{{- end }} +{{- if $pool }} +{{- $_ := set $ws "pool" $pool }} +{{- end }} +{{- end }} +{{- if $ws }} +{{- $_ := set $config "websocket" $ws }} +{{- end }} +{{- end }} {{- $config | toJson }} {{- end }} @@ -941,7 +1097,7 @@ Call this template at the beginning of deployment/stateful templates {{- fail "ERROR: bifrost.plugins.otel.config.collector_url is required when OTEL plugin is enabled. Provide the URL of your OpenTelemetry collector." }} {{- end }} {{- if not .Values.bifrost.plugins.otel.config.trace_type }} -{{- fail "ERROR: bifrost.plugins.otel.config.trace_type is required when OTEL plugin is enabled. Supported value: otel" }} +{{- fail "ERROR: bifrost.plugins.otel.config.trace_type is required when OTEL plugin is enabled. Supported values: genai_extension, vercel, open_inference" }} {{- end }} {{- if not .Values.bifrost.plugins.otel.config.protocol }} {{- fail "ERROR: bifrost.plugins.otel.config.protocol is required when OTEL plugin is enabled. Supported values: http, grpc" }} @@ -955,22 +1111,29 @@ Call this template at the beginning of deployment/stateful templates {{- end }} {{- end }} -{{/* Validate SAML/Okta config when enabled */}} -{{- if and .Values.bifrost.saml .Values.bifrost.saml.enabled }} -{{- if eq .Values.bifrost.saml.provider "okta" }} -{{- if not .Values.bifrost.saml.config.issuerUrl }} -{{- fail "ERROR: bifrost.saml.config.issuerUrl is required when SAML provider is Okta. Example: https://your-domain.okta.com/oauth2/default" }} +{{/* Validate SCIM/SSO config when enabled */}} +{{- $scimValidation := .Values.bifrost.scim }} +{{- if and $scimValidation $scimValidation.enabled }} +{{- if eq $scimValidation.provider "okta" }} +{{- if not $scimValidation.config.issuerUrl }} +{{- fail "ERROR: bifrost.scim.config.issuerUrl is required when SCIM provider is Okta. Example: https://your-domain.okta.com/oauth2/default" }} {{- end }} -{{- if not .Values.bifrost.saml.config.clientId }} -{{- fail "ERROR: bifrost.saml.config.clientId is required when SAML provider is Okta." }} +{{- if not $scimValidation.config.clientId }} +{{- fail "ERROR: bifrost.scim.config.clientId is required when SCIM provider is Okta." }} {{- end }} +{{- if not $scimValidation.config.clientSecret }} +{{- fail "ERROR: bifrost.scim.config.clientSecret is required when SCIM provider is Okta." }} {{- end }} -{{- if eq .Values.bifrost.saml.provider "entra" }} -{{- if not .Values.bifrost.saml.config.tenantId }} -{{- fail "ERROR: bifrost.saml.config.tenantId is required when SAML provider is Entra (Azure AD)." }} +{{- if not $scimValidation.config.apiToken }} +{{- fail "ERROR: bifrost.scim.config.apiToken is required when SCIM provider is Okta." }} {{- end }} -{{- if not .Values.bifrost.saml.config.clientId }} -{{- fail "ERROR: bifrost.saml.config.clientId is required when SAML provider is Entra (Azure AD)." }} +{{- end }} +{{- if eq $scimValidation.provider "entra" }} +{{- if not $scimValidation.config.tenantId }} +{{- fail "ERROR: bifrost.scim.config.tenantId is required when SCIM provider is Entra (Azure AD)." }} +{{- end }} +{{- if not $scimValidation.config.clientId }} +{{- fail "ERROR: bifrost.scim.config.clientId is required when SCIM provider is Entra (Azure AD)." }} {{- end }} {{- end }} {{- end }} @@ -1105,9 +1268,6 @@ Call this template at the beginning of deployment/stateful templates {{- if not $vk.name }} {{- fail (printf "ERROR: bifrost.governance.virtualKeys[%d].name is required for virtual key '%s'." $idx $vk.id) }} {{- end }} -{{- if not $vk.value }} -{{- fail (printf "ERROR: bifrost.governance.virtualKeys[%d].value is required for virtual key '%s'." $idx $vk.id) }} -{{- end }} {{- end }} {{- end }} diff --git a/helm-charts/bifrost/templates/deployment.yaml b/helm-charts/bifrost/templates/deployment.yaml index 9200f78548..74dc54e322 100644 --- a/helm-charts/bifrost/templates/deployment.yaml +++ b/helm-charts/bifrost/templates/deployment.yaml @@ -142,6 +142,42 @@ spec: name: {{ .Values.vectorStore.pinecone.external.existingSecret }} key: {{ .Values.vectorStore.pinecone.external.apiKeyKey | default "api-key" }} {{- end }} + {{- /* Object storage credentials from existing secret */ -}} + {{- if and .Values.storage.logsStore.enabled .Values.storage.logsStore.objectStorage .Values.storage.logsStore.objectStorage.enabled .Values.storage.logsStore.objectStorage.existingSecret }} + {{- if eq .Values.storage.logsStore.objectStorage.type "s3" }} + - name: BIFROST_OBJECT_STORAGE_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.accessKeyIdKey | default "access-key-id" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.secretAccessKeyKey | default "secret-access-key" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SESSION_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.sessionTokenKey | default "session-token" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_ROLE_ARN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.roleArnKey | default "role-arn" }} + optional: true + {{- end }} + {{- if eq .Values.storage.logsStore.objectStorage.type "gcs" }} + - name: BIFROST_OBJECT_STORAGE_CREDENTIALS_JSON + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.credentialsJsonKey | default "credentials-json" }} + {{- end }} + {{- end }} {{- /* Maxim API key from existing secret */ -}} {{- if and .Values.bifrost.plugins.maxim.enabled .Values.bifrost.plugins.maxim.secretRef .Values.bifrost.plugins.maxim.secretRef.name }} - name: BIFROST_MAXIM_API_KEY @@ -150,6 +186,18 @@ spec: name: {{ .Values.bifrost.plugins.maxim.secretRef.name }} key: {{ .Values.bifrost.plugins.maxim.secretRef.key | default "api-key" }} {{- end }} + {{- /* MCP client connection strings from existing secrets (one per client with secretRef.name set) */ -}} + {{- if .Values.bifrost.mcp.enabled }} + {{- range $idx, $client := .Values.bifrost.mcp.clientConfigs }} + {{- if and $client.secretRef $client.secretRef.name }} + - name: BIFROST_MCP_{{ regexReplaceAll "[^A-Z0-9]+" (upper $client.name) "_" }}_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: {{ $client.secretRef.name }} + key: {{ $client.secretRef.connectionStringKey | default "connection-string" }} + {{- end }} + {{- end }} + {{- end }} {{- /* Governance auth credentials from existing secret */ -}} {{- if and .Values.bifrost.governance .Values.bifrost.governance.authConfig .Values.bifrost.governance.authConfig.existingSecret }} - name: BIFROST_ADMIN_USERNAME diff --git a/helm-charts/bifrost/templates/stateful.yaml b/helm-charts/bifrost/templates/stateful.yaml index 4652480a6c..2443e9ffb4 100644 --- a/helm-charts/bifrost/templates/stateful.yaml +++ b/helm-charts/bifrost/templates/stateful.yaml @@ -142,6 +142,42 @@ spec: name: {{ .Values.vectorStore.pinecone.external.existingSecret }} key: {{ .Values.vectorStore.pinecone.external.apiKeyKey | default "api-key" }} {{- end }} + {{- /* Object storage credentials from existing secret */ -}} + {{- if and .Values.storage.logsStore.enabled .Values.storage.logsStore.objectStorage .Values.storage.logsStore.objectStorage.enabled .Values.storage.logsStore.objectStorage.existingSecret }} + {{- if eq .Values.storage.logsStore.objectStorage.type "s3" }} + - name: BIFROST_OBJECT_STORAGE_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.accessKeyIdKey | default "access-key-id" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.secretAccessKeyKey | default "secret-access-key" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_SESSION_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.sessionTokenKey | default "session-token" }} + optional: true + - name: BIFROST_OBJECT_STORAGE_ROLE_ARN + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.roleArnKey | default "role-arn" }} + optional: true + {{- end }} + {{- if eq .Values.storage.logsStore.objectStorage.type "gcs" }} + - name: BIFROST_OBJECT_STORAGE_CREDENTIALS_JSON + valueFrom: + secretKeyRef: + name: {{ .Values.storage.logsStore.objectStorage.existingSecret }} + key: {{ .Values.storage.logsStore.objectStorage.credentialsJsonKey | default "credentials-json" }} + {{- end }} + {{- end }} {{- /* Maxim API key from existing secret */ -}} {{- if and .Values.bifrost.plugins.maxim.enabled .Values.bifrost.plugins.maxim.secretRef .Values.bifrost.plugins.maxim.secretRef.name }} - name: BIFROST_MAXIM_API_KEY @@ -150,6 +186,18 @@ spec: name: {{ .Values.bifrost.plugins.maxim.secretRef.name }} key: {{ .Values.bifrost.plugins.maxim.secretRef.key | default "api-key" }} {{- end }} + {{- /* MCP client connection strings from existing secrets (one per client with secretRef.name set) */ -}} + {{- if .Values.bifrost.mcp.enabled }} + {{- range $idx, $client := .Values.bifrost.mcp.clientConfigs }} + {{- if and $client.secretRef $client.secretRef.name }} + - name: BIFROST_MCP_{{ regexReplaceAll "[^A-Z0-9]+" (upper $client.name) "_" }}_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: {{ $client.secretRef.name }} + key: {{ $client.secretRef.connectionStringKey | default "connection-string" }} + {{- end }} + {{- end }} + {{- end }} {{- /* Governance auth credentials from existing secret */ -}} {{- if and .Values.bifrost.governance .Values.bifrost.governance.authConfig .Values.bifrost.governance.authConfig.existingSecret }} - name: BIFROST_ADMIN_USERNAME diff --git a/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml b/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml index 15fe5d08dd..9f57bdf5ab 100644 --- a/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml +++ b/helm-charts/bifrost/values-examples/providers-and-virtual-keys.yaml @@ -74,15 +74,15 @@ bifrost: - name: "openai-primary" value: "sk-dummy-openai-key-1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 2 # Gets 50% of traffic (2 out of 4 total weight) - models: + models: ["*"] - name: "openai-secondary" value: "sk-dummy-openai-key-2-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 # Gets 25% of traffic - models: + models: ["*"] - name: "openai-backup" value: "sk-dummy-openai-key-3-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 # Gets 25% of traffic - models: + models: ["*"] # Anthropic - 2 API keys anthropic: @@ -90,11 +90,11 @@ bifrost: - name: "anthropic-primary" value: "sk-ant-dummy-key-1-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] - name: "anthropic-secondary" value: "sk-ant-dummy-key-2-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] # Groq - 2 API keys groq: @@ -102,11 +102,11 @@ bifrost: - name: "groq-primary" value: "gsk_dummy_groq_key_1_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] - name: "groq-secondary" value: "gsk_dummy_groq_key_2_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" weight: 1 - models: + models: ["*"] # ========================================================================== # GOVERNANCE CONFIGURATION diff --git a/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml b/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml index 6f5a988039..6111ff3c98 100644 --- a/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml +++ b/helm-charts/bifrost/values-examples/secrets-from-k8s.yaml @@ -76,11 +76,13 @@ bifrost: providers: openai: keys: - - value: "env.OPENAI_API_KEY" + - name: "openai-primary" + value: "env.OPENAI_API_KEY" weight: 1 anthropic: keys: - - value: "env.ANTHROPIC_API_KEY" + - name: "anthropic-primary" + value: "env.ANTHROPIC_API_KEY" weight: 1 # Provider secrets - inject API keys from Kubernetes secrets as env vars diff --git a/helm-charts/bifrost/values.schema.json b/helm-charts/bifrost/values.schema.json index 1c60ca1f00..28074334d0 100644 --- a/helm-charts/bifrost/values.schema.json +++ b/helm-charts/bifrost/values.schema.json @@ -240,6 +240,15 @@ "type": "string", "description": "Encryption key for sensitive data" }, + "encryptionKeySecret": { + "type": "object", + "description": "Reference to an existing Kubernetes secret holding the encryption key. Takes precedence over `encryptionKey` when `name` is set.", + "properties": { + "name": { "type": "string" }, + "key": { "type": "string", "default": "encryption-key" } + }, + "additionalProperties": false + }, "authConfig": { "$ref": "#/$defs/authConfig" }, @@ -293,8 +302,15 @@ "type": "integer", "minimum": 1 }, - "enableLitellmFallbacks": { - "type": "boolean" + "compat": { + "type": "object", + "additionalProperties": false, + "properties": { + "convertTextToChat": { "type": "boolean" }, + "convertChatToResponses": { "type": "boolean" }, + "shouldDropParams": { "type": "boolean" }, + "shouldConvertParams": { "type": "boolean" } + } }, "prometheusLabels": { "type": "array", @@ -383,6 +399,16 @@ "hideDeletedVirtualKeysInFilters": { "type": "boolean", "description": "When true, deleted virtual keys are omitted from logs and MCP logs filter data" + }, + "mcpDisableAutoToolInject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers." + }, + "routingChainMaxDepth": { + "type": "integer", + "minimum": 1, + "description": "Maximum depth for routing rule chain evaluation", + "default": 10 } }, "additionalProperties": false @@ -456,6 +482,11 @@ "type": "string", "enum": ["server", "tool"], "description": "How tools are exposed in VFS for code execution" + }, + "disableAutoToolInject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified.", + "default": false } } }, @@ -748,7 +779,9 @@ "trace_type": { "type": "string", "enum": [ - "otel" + "genai_extension", + "vercel", + "open_inference" ], "description": "Type of trace to use for the OTEL collector" }, @@ -876,7 +909,7 @@ }, "placement": { "type": "string", - "enum": ["pre_builtin", "post_builtin"], + "enum": ["pre_builtin", "post_builtin", "builtin"], "default": "post_builtin", "description": "Plugin execution placement relative to built-in plugins" }, @@ -1065,12 +1098,12 @@ "customer_id": { "type": "string" }, - "budget_id": { - "type": "string" - }, "rate_limit_id": { "type": "string" }, + "calendar_aligned": { + "type": "boolean" + }, "provider_configs": { "type": "array", "items": { @@ -1085,16 +1118,16 @@ "mcp_client_id": { "type": "integer" }, + "mcp_client_name": { + "type": "string" + }, "tools_to_execute": { "type": "array", "items": { "type": "string" } } - }, - "required": [ - "mcp_client_id" - ] + } } } }, @@ -1155,12 +1188,6 @@ "required": ["weight"] } }, - "provider": { - "type": "string" - }, - "model": { - "type": "string" - }, "fallbacks": { "type": "array", "items": { @@ -1220,6 +1247,40 @@ }, "required": ["name"] } + }, + "pricingOverrides": { + "type": "array", + "description": "Scoped pricing overrides applied at runtime by the model catalog", + "items": { + "type": "object", + "properties": { + "id": { "type": "string", "description": "Unique pricing override ID" }, + "name": { "type": "string", "description": "Human-readable name for this override" }, + "scope_kind": { + "type": "string", + "enum": ["global", "provider", "provider_key", "virtual_key", "virtual_key_provider", "virtual_key_provider_key"], + "description": "Scope level for this override" + }, + "virtual_key_id": { "type": "string", "description": "Virtual key ID (required for virtual_key* scopes)" }, + "provider_id": { "type": "string", "description": "Provider ID (required for provider* scopes)" }, + "provider_key_id": { "type": "string", "description": "Provider key ID (required for provider_key and virtual_key_provider_key scopes)" }, + "match_type": { + "type": "string", + "enum": ["exact", "wildcard"], + "description": "How the pattern is matched against model names" + }, + "pattern": { "type": "string", "description": "Model name pattern to match" }, + "request_types": { + "type": "array", + "minItems": 1, + "items": { "type": "string" }, + "description": "Request types this override applies to" + }, + "pricing_patch": { "type": "string", "description": "JSON-encoded pricing fields to override" }, + "config_hash": { "type": "string", "description": "Internal hash for change detection (auto-managed)" } + }, + "required": ["id", "name", "scope_kind", "match_type", "pattern", "request_types"] + } } }, "additionalProperties": false @@ -1359,7 +1420,7 @@ ] } }, - "saml": { + "scim": { "type": "object", "properties": { "enabled": { @@ -1372,7 +1433,7 @@ "okta", "entra" ], - "description": "SAML provider type (empty when not configured)" + "description": "SCIM/SSO provider type (empty when not configured)" }, "config": { "type": "object" @@ -1409,6 +1470,9 @@ "clientSecret": { "type": "string" }, + "apiToken": { + "type": "string" + }, "audience": { "type": "string" }, @@ -1424,7 +1488,9 @@ }, "required": [ "issuerUrl", - "clientId" + "clientId", + "clientSecret", + "apiToken" ] } } @@ -1606,6 +1672,89 @@ "type": "string" } } + }, + "largePayloadOptimization": { + "type": "object", + "description": "Large payload streaming optimization configuration", + "properties": { + "enabled": { + "type": "boolean", + "default": false + }, + "requestThresholdBytes": { + "type": "integer", + "minimum": 0, + "default": 10485760 + }, + "responseThresholdBytes": { + "type": "integer", + "minimum": 0, + "default": 10485760 + }, + "prefetchSizeBytes": { + "type": "integer", + "minimum": 0, + "default": 65536 + }, + "maxPayloadBytes": { + "type": "integer", + "minimum": 0, + "default": 524288000 + }, + "truncatedLogBytes": { + "type": "integer", + "minimum": 0, + "default": 1048576 + } + } + }, + "websocket": { + "type": "object", + "description": "Optional tuning for the WebSocket gateway (Responses API WebSocket Mode, Realtime API)", + "properties": { + "maxConnectionsPerUser": { + "type": "integer", + "minimum": 1, + "description": "Maximum concurrent WebSocket connections per user", + "default": 100 + }, + "transcriptBufferSize": { + "type": "integer", + "minimum": 1, + "description": "Number of transcript entries to buffer for Realtime API mid-session fallback", + "default": 100 + }, + "pool": { + "type": "object", + "description": "Upstream WebSocket connection pool configuration", + "properties": { + "maxIdlePerKey": { + "type": "integer", + "minimum": 1, + "description": "Maximum idle connections per provider/key combination", + "default": 50 + }, + "maxTotalConnections": { + "type": "integer", + "minimum": 1, + "description": "Maximum total idle connections across all providers", + "default": 1000 + }, + "idleTimeoutSeconds": { + "type": "integer", + "minimum": 1, + "description": "Seconds before an idle connection is evicted", + "default": 600 + }, + "maxConnectionLifetimeSeconds": { + "type": "integer", + "minimum": 1, + "description": "Maximum lifetime of a connection in seconds", + "default": 7200 + } + } + } + } } } }, @@ -1689,6 +1838,81 @@ "maxOpenConns": { "type": "integer", "minimum": 2 + }, + "objectStorage": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["s3", "gcs"] + }, + "bucket": { + "type": "string" + }, + "prefix": { + "type": "string" + }, + "compress": { + "type": "boolean" + }, + "region": { + "type": "string" + }, + "endpoint": { + "type": "string" + }, + "accessKeyId": { + "type": "string" + }, + "secretAccessKey": { + "type": "string" + }, + "sessionToken": { + "type": "string" + }, + "roleArn": { + "type": "string" + }, + "forcePathStyle": { + "type": "boolean" + }, + "projectId": { + "type": "string" + }, + "credentialsJson": { + "type": "string" + }, + "existingSecret": { + "type": "string" + }, + "accessKeyIdKey": { + "type": "string" + }, + "secretAccessKeyKey": { + "type": "string" + }, + "sessionTokenKey": { + "type": "string" + }, + "roleArnKey": { + "type": "string" + }, + "credentialsJsonKey": { + "type": "string" + } + }, + "if": { + "properties": { + "enabled": { "const": true } + }, + "required": ["enabled"] + }, + "then": { + "required": ["type", "bucket"] + } } } } @@ -2581,6 +2805,11 @@ "minimum": 1, "maximum": 10000, "description": "Maximum number of TCP connections per provider host. For HTTP/2 (e.g. Bedrock), each connection supports ~100 concurrent streams. Default: 5000." + }, + "beta_header_overrides": { + "type": "object", + "additionalProperties": { "type": "boolean" }, + "description": "Override default Anthropic beta header support per provider. Keys are header prefixes, values are true (supported) or false (unsupported)." } } }, @@ -2658,6 +2887,15 @@ "type": "string", "description": "HTTP or SSE URL (required for HTTP or SSE connections)" }, + "secretRef": { + "type": "object", + "description": "Reference to an existing Kubernetes secret holding the MCP connection_string. Chart injects BIFROST_MCP__CONNECTION_STRING and rewrites connection_string in config.json.", + "properties": { + "name": { "type": "string" }, + "connectionStringKey": { "type": "string", "default": "connection-string" } + }, + "additionalProperties": false + }, "authType": { "type": "string", "enum": ["none", "headers", "oauth"], @@ -2744,6 +2982,16 @@ "type": "number", "minimum": 0 } + }, + "allowedExtraHeaders": { + "type": "array", + "items": { "type": "string" }, + "description": "Allowlist of request-level headers that callers may forward to this MCP server. Use ['*'] to allow all headers." + }, + "allowOnAllVirtualKeys": { + "type": "boolean", + "description": "When true, this MCP server is accessible to all virtual keys without requiring explicit per-key assignment.", + "default": false } }, "required": [ @@ -2832,7 +3080,7 @@ }, "allowed_models": { "type": "array", - "description": "Allowed models for this provider config (empty means all models allowed)", + "description": "Allowed models for this provider config. Use [\"*\"] to allow all models; empty array denies all (deny-by-default).", "items": { "type": "string" } @@ -2872,7 +3120,7 @@ "items": { "type": "string" }, - "description": "Supported models for this key" + "description": "Models this key can access. Use [\"*\"] to allow all models; empty array denies all (deny-by-default)." }, "weight": { "type": "number", @@ -2899,7 +3147,6 @@ "description": "Azure API version" } }, - "required": ["endpoint"], "additionalProperties": false }, "vertex_key_config": { @@ -2929,7 +3176,6 @@ "description": "Model to deployment mappings" } }, - "required": ["project_id", "region"], "additionalProperties": false }, "bedrock_key_config": { @@ -3009,10 +3255,6 @@ "description": "Exact model name served on this VLLM instance" } }, - "required": [ - "url", - "model_name" - ], "additionalProperties": false } }, @@ -3067,8 +3309,7 @@ ] } } - ], - "required": ["key_id", "name", "value"] + ] } } }, @@ -3078,4 +3319,4 @@ "additionalProperties": false } } -} +} \ No newline at end of file diff --git a/helm-charts/bifrost/values.yaml b/helm-charts/bifrost/values.yaml index fb468794c6..c9f06a9cc8 100644 --- a/helm-charts/bifrost/values.yaml +++ b/helm-charts/bifrost/values.yaml @@ -56,7 +56,7 @@ podSecurityContext: securityContext: capabilities: drop: - - ALL + - ALL readOnlyRootFilesystem: false runAsNonRoot: true runAsUser: 1000 @@ -163,6 +163,14 @@ bifrost: # Can be set as a secret or environment variable encryptionKey: "" + # Use an existing Kubernetes secret for the encryption key. + # When `name` is set, takes precedence over `encryptionKey`: the chart + # injects BIFROST_ENCRYPTION_KEY into the pod via secretKeyRef and writes + # `encryption_key: "env.BIFROST_ENCRYPTION_KEY"` in the rendered config.json. + encryptionKeySecret: + name: "" + key: "encryption-key" + # Authentication configuration (top-level) # This controls authentication for Bifrost API and dashboard authConfig: @@ -188,7 +196,11 @@ bifrost: enforceGovernanceHeader: false allowDirectKeys: false maxRequestBodySizeMb: 100 - enableLitellmFallbacks: false + compat: + convertTextToChat: false + convertChatToResponses: false + shouldDropParams: false + shouldConvertParams: false prometheusLabels: [] # Header filtering configuration for x-bf-eh-* headers forwarded to LLM providers headerFilterConfig: @@ -204,6 +216,7 @@ bifrost: # mcpToolSyncInterval: 0 # Global tool sync interval in minutes (0 = disabled) # hideDeletedVirtualKeysInFilters: false # Omit deleted virtual keys from logs/MCP filter data # whitelistedRoutes: [] # Routes that bypass auth middleware + # routingChainMaxDepth: 10 # Maximum depth for routing rule chain evaluation # Framework configuration framework: @@ -255,46 +268,46 @@ bifrost: # ca_cert_pem: "" # PEM-encoded CA cert for SSL-intercepting proxies # send_back_raw_response: false # Include raw response in BifrostResponse # store_raw_request_response: false # Capture raw payloads for plugins only; not returned to client - # - # anthropic: - # keys: - # - name: "anthropic-key" - # value: "sk-ant-..." - # weight: 1 - # - # # Azure OpenAI example (requires azure_key_config) - # azure: - # keys: - # - name: "azure-key" - # value: "..." - # weight: 1 - # azure_key_config: - # endpoint: "https://your-resource.openai.azure.com" - # api_version: "2024-02-15-preview" - # deployments: - # gpt-4o: "my-gpt4o-deployment" - # - # # Google Vertex AI example (requires vertex_key_config) - # vertex: - # keys: - # - name: "vertex-key" - # value: "" - # weight: 1 - # vertex_key_config: - # project_id: "my-gcp-project" - # region: "us-central1" - # auth_credentials: "env.GOOGLE_CREDENTIALS" - # - # # AWS Bedrock example (requires bedrock_key_config) - # bedrock: - # keys: - # - name: "bedrock-key" - # value: "" - # weight: 1 - # bedrock_key_config: - # region: "us-east-1" - # access_key: "env.AWS_ACCESS_KEY_ID" - # secret_key: "env.AWS_SECRET_ACCESS_KEY" + # + # anthropic: + # keys: + # - name: "anthropic-key" + # value: "sk-ant-..." + # weight: 1 + # + # # Azure OpenAI example (requires azure_key_config) + # azure: + # keys: + # - name: "azure-key" + # value: "..." + # weight: 1 + # azure_key_config: + # endpoint: "https://your-resource.openai.azure.com" + # api_version: "2024-02-15-preview" + # deployments: + # gpt-4o: "my-gpt4o-deployment" + # + # # Google Vertex AI example (requires vertex_key_config) + # vertex: + # keys: + # - name: "vertex-key" + # value: "" + # weight: 1 + # vertex_key_config: + # project_id: "my-gcp-project" + # region: "us-central1" + # auth_credentials: "env.GOOGLE_CREDENTIALS" + # + # # AWS Bedrock example (requires bedrock_key_config) + # bedrock: + # keys: + # - name: "bedrock-key" + # value: "" + # weight: 1 + # bedrock_key_config: + # region: "us-east-1" + # access_key: "env.AWS_ACCESS_KEY_ID" + # secret_key: "env.AWS_SECRET_ACCESS_KEY" # Provider secrets - use existing Kubernetes secrets for provider API keys # These will be injected as environment variables that can be referenced in providers config @@ -318,6 +331,13 @@ bifrost: # command: "/path/to/mcp/server" # args: [] # envs: [] + # # Optional: source connection_string from a Kubernetes secret. + # # When set, chart injects BIFROST_MCP__CONNECTION_STRING + # # into the pod and rewrites connection_string in config.json + # # to `env.BIFROST_MCP__CONNECTION_STRING`. + # secretRef: + # name: "" # k8s secret name + # connectionStringKey: "connection-string" # key within the secret # toolSyncInterval: "10m" # Global tool sync interval (Go duration) # Tool manager configuration toolManagerConfig: @@ -387,17 +407,17 @@ bifrost: config: service_name: "bifrost" collector_url: "" - trace_type: "otel" + trace_type: "genai_extension" protocol: "grpc" # Push-based metrics export via OTLP (recommended for multi-node clusters) metrics_enabled: false - metrics_endpoint: "" # e.g., http://otel-collector:4318/v1/metrics (HTTP) or otel-collector:4317 (gRPC) - metrics_push_interval: 15 # Push interval in seconds (1-300) + metrics_endpoint: "" # e.g., http://otel-collector:4318/v1/metrics (HTTP) or otel-collector:4317 (gRPC) + metrics_push_interval: 15 # Push interval in seconds (1-300) # Custom headers for the collector (supports env.VAR_NAME prefix for env var substitution) headers: {} # TLS configuration - tls_ca_cert: "" # Path to TLS CA certificate file - insecure: false # Skip TLS verification (ignored if tls_ca_cert is set) + tls_ca_cert: "" # Path to TLS CA certificate file + insecure: false # Skip TLS verification (ignored if tls_ca_cert is set) datadog: enabled: false @@ -448,7 +468,7 @@ bifrost: # - id: "vk-1" # name: "Virtual Key 1" # description: "Virtual key description" - # value: "vk-..." + # value: "vk-..." # Optional - auto-generated if omitted # is_active: true # team_id: "team-1" # Mutually exclusive with customer_id # customer_id: "" # Mutually exclusive with team_id @@ -486,13 +506,15 @@ bifrost: # name: "Route to Azure" # description: "Route GPT requests to Azure" # enabled: true - # cel_expression: "request.model.startsWith('gpt-')" - # provider: "azure" - # model: "" # Empty means use original model + # cel_expression: "model.startsWith('gpt-')" + # targets: + # - provider: "azure" + # model: "" # Empty means use original model + # weight: 1.0 # fallbacks: ["openai"] - # scope: "global" # Options: global, team, customer, virtual_key - # scope_id: "" # Required for non-global scopes - # priority: 0 # Lower = evaluated first + # scope: "global" # Options: global, team, customer, virtual_key + # scope_id: "" # Required for non-global scopes + # priority: 0 # Lower = evaluated first authConfig: adminUsername: "" adminPassword: "" @@ -535,8 +557,8 @@ bifrost: # mDNS discovery mdnsService: "" - # SAML/SCIM configuration for enterprise SSO - saml: + # SCIM/SSO configuration for enterprise SSO + scim: enabled: false # Provider: okta, entra provider: "" @@ -545,6 +567,7 @@ bifrost: # issuerUrl: "https://your-domain.okta.com/oauth2/default" # clientId: "" # clientSecret: "" + # apiToken: "" # audience: "" # userIdField: "sub" # teamIdsField: "groups" @@ -590,11 +613,30 @@ bifrost: disabled: false hmacKey: "" + # Large payload optimization - streams large payloads without full materialization + # largePayloadOptimization: + # enabled: false + # requestThresholdBytes: 10485760 # 10MB + # responseThresholdBytes: 10485760 # 10MB + # prefetchSizeBytes: 65536 # 64KB + # maxPayloadBytes: 524288000 # 500MB + # truncatedLogBytes: 1048576 # 1MB + + # WebSocket gateway configuration (Responses API, Realtime API) + # websocket: + # maxConnectionsPerUser: 100 + # transcriptBufferSize: 100 + # pool: + # maxIdlePerKey: 50 + # maxTotalConnections: 1000 + # idleTimeoutSeconds: 600 + # maxConnectionLifetimeSeconds: 7200 + # Storage configuration storage: # Default storage mode: sqlite or postgres # Used as fallback when per-store type is not specified - mode: sqlite # Options: sqlite, postgres + mode: sqlite # Options: sqlite, postgres # Persistent volume for SQLite databases (when using sqlite for any store) persistence: @@ -608,7 +650,7 @@ storage: configStore: enabled: true # Backend type for config store. Empty string uses storage.mode as default - type: "" # Options: sqlite, postgres, or "" (uses storage.mode) + type: "" # Options: sqlite, postgres, or "" (uses storage.mode) # PostgreSQL connection pool tuning (only applies when type is postgres) # maxIdleConns: 5 # maxOpenConns: 50 @@ -617,11 +659,34 @@ storage: logsStore: enabled: true # Backend type for logs store. Empty string uses storage.mode as default - type: "" # Options: sqlite, postgres, or "" (uses storage.mode) + type: "" # Options: sqlite, postgres, or "" (uses storage.mode) # PostgreSQL connection pool tuning (only applies when type is postgres) # maxIdleConns: 5 # maxOpenConns: 50 + # Object storage for offloading large log payloads (optional) + # When enabled, request/response payloads are stored in S3/GCS + # while the DB keeps only lightweight index data for fast analytics. + objectStorage: + enabled: false + # type: s3 # Options: s3, gcs + # bucket: "" # Bucket name + # prefix: bifrost # Key prefix for stored objects + # compress: false # Enable gzip compression for stored objects + + # S3 configuration (when type is s3) + # region: us-east-1 + # endpoint: "" # Custom endpoint for MinIO/R2 + # accessKeyId: "" # Leave empty to use default AWS credential chain + # secretAccessKey: "" # (instance role, env vars, shared credentials, etc.) + # sessionToken: "" # AWS STS session token (optional) + # roleArn: "" # AWS IAM role ARN to assume via STS (works with static creds or instance role) + # forcePathStyle: false # Set true for MinIO + + # GCS configuration (when type is gcs) + # projectId: "" + # credentialsJson: "" # Service account JSON, omit for default credentials + # PostgreSQL configuration (when any store uses postgres) postgresql: # Deploy PostgreSQL as part of this chart @@ -672,7 +737,7 @@ postgresql: vectorStore: # Enable vector store for semantic caching enabled: false - type: none # Options: none, weaviate, redis, qdrant + type: none # Options: none, weaviate, redis, qdrant # Weaviate configuration weaviate: @@ -733,10 +798,10 @@ vectorStore: username: "" password: "" database: 0 - useTls: false # Enable TLS for Redis connection - insecureSkipVerify: false # Skip TLS certificate verification - caCertPem: "" # PEM-encoded CA certificate to trust for Redis TLS - clusterMode: false # Use Redis Cluster mode (required for AWS MemoryDB) + useTls: false # Enable TLS for Redis connection + insecureSkipVerify: false # Skip TLS certificate verification + caCertPem: "" # PEM-encoded CA certificate to trust for Redis TLS + clusterMode: false # Use Redis Cluster mode (required for AWS MemoryDB) # Connection pool tuning (optional) # poolSize: 10 # Maximum number of socket connections # maxActiveConns: 0 # Maximum number of active connections @@ -819,7 +884,7 @@ vectorStore: external: enabled: false apiKey: "" - indexHost: "" # Index host URL from Pinecone console (e.g., your-index.svc.environment.pinecone.io) + indexHost: "" # Index host URL from Pinecone console (e.g., your-index.svc.environment.pinecone.io) # Use existing Kubernetes secret for API key (takes precedence over apiKey field) existingSecret: "" apiKeyKey: "api-key" @@ -836,7 +901,6 @@ envFrom: [] # - configMapRef: # name: my-configmap - # Init containers to run before the main application container. # Provide a list of init containers using standard Kubernetes container spec. initContainers: [] diff --git a/helm-charts/index.yaml b/helm-charts/index.yaml index a47aa85c4f..45ce3dc564 100644 --- a/helm-charts/index.yaml +++ b/helm-charts/index.yaml @@ -1,6 +1,69 @@ apiVersion: v1 entries: bifrost: + - apiVersion: v2 + appVersion: 1.4.11 + created: "2026-04-15T18:00:00.000000+00:00" + description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers + digest: "" + home: https://www.getmaxim.ai/bifrost + icon: https://www.getmaxim.ai/bifrost/bifrost-logo-only.png + keywords: + - ai + - gateway + - llm + maintainers: + - email: akshay@getmaxim.ai + name: Bifrost Team + name: bifrost + sources: + - https://github.com/maximhq/bifrost + type: application + urls: + - https://maximhq.github.io/bifrost/helm-charts/bifrost-2.1.1.tgz + version: 2.1.1 + - apiVersion: v2 + appVersion: 1.4.11 + created: "2026-04-15T12:00:00.000000+00:00" + description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers + digest: "" + home: https://www.getmaxim.ai/bifrost + icon: https://www.getmaxim.ai/bifrost/bifrost-logo-only.png + keywords: + - ai + - gateway + - llm + maintainers: + - email: akshay@getmaxim.ai + name: Bifrost Team + name: bifrost + sources: + - https://github.com/maximhq/bifrost + type: application + urls: + - https://maximhq.github.io/bifrost/helm-charts/bifrost-2.1.0-prerelease2.tgz + version: 2.1.0-prerelease2 + - apiVersion: v2 + appVersion: 1.4.11 + created: "2026-04-13T12:00:00.000000+00:00" + description: A Helm chart for deploying Bifrost - AI Gateway with unified interface for multiple providers + digest: "" + home: https://www.getmaxim.ai/bifrost + icon: https://www.getmaxim.ai/bifrost/bifrost-logo-only.png + keywords: + - ai + - gateway + - llm + maintainers: + - email: akshay@getmaxim.ai + name: Bifrost Team + name: bifrost + sources: + - https://github.com/maximhq/bifrost + type: application + urls: + - https://maximhq.github.io/bifrost/helm-charts/bifrost-2.0.18-rc.1.tgz + version: 2.0.18-rc.1 - apiVersion: v2 appVersion: 1.4.11 created: "2026-04-08T12:00:00.000000+00:00" @@ -607,4 +670,4 @@ entries: urls: - https://maximhq.github.io/bifrost/helm-charts/bifrost-1.3.36.tgz version: 1.3.36 -generated: "2026-04-08T12:00:00.000000+00:00" +generated: "2026-04-15T18:00:00.000000+00:00" diff --git a/plugins/governance/go.mod b/plugins/governance/go.mod index 13949872e0..783ba640cd 100644 --- a/plugins/governance/go.mod +++ b/plugins/governance/go.mod @@ -1,6 +1,6 @@ module github.com/maximhq/bifrost/plugins/governance -go 1.26.2 +go 1.26.1 require gorm.io/gorm v1.31.1 @@ -8,8 +8,8 @@ require ( github.com/bytedance/sonic v1.15.0 github.com/google/cel-go v0.26.1 github.com/google/uuid v1.6.0 - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 github.com/stretchr/testify v1.11.1 github.com/valyala/fasthttp v1.68.0 ) diff --git a/plugins/governance/go.sum b/plugins/governance/go.sum index e346a2c75f..7739c6f839 100644 --- a/plugins/governance/go.sum +++ b/plugins/governance/go.sum @@ -199,10 +199,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= diff --git a/plugins/governance/main.go b/plugins/governance/main.go index 0098de2fe5..89a645b905 100644 --- a/plugins/governance/main.go +++ b/plugins/governance/main.go @@ -161,7 +161,7 @@ func Init( } // Initialize components in dependency order with fixed, optimal settings // Resolver (pure decision engine for hierarchical governance, depends only on store) - resolver := NewBudgetResolver(governanceStore, modelCatalog, logger) + resolver := NewBudgetResolver(governanceStore, modelCatalog, logger, inMemoryStore) // 3. Tracker (business logic owner, depends on store and resolver) tracker := NewUsageTracker(ctx, governanceStore, resolver, configStore, logger) @@ -263,7 +263,7 @@ func InitFromStore( isVkMandatory = config.IsVkMandatory requiredHeaders = config.RequiredHeaders } - resolver := NewBudgetResolver(governanceStore, modelCatalog, logger) + resolver := NewBudgetResolver(governanceStore, modelCatalog, logger, inMemoryStore) tracker := NewUsageTracker(ctx, governanceStore, resolver, configStore, logger) engine, err := NewRoutingEngine(governanceStore, logger) if err != nil { @@ -576,8 +576,10 @@ func (p *GovernancePlugin) loadBalanceProvider(ctx *schemas.BifrostContext, req // This handles all cross-provider logic (OpenRouter, Vertex, Groq, Bedrock) // and provider-prefixed allowed_models entries isProviderAllowed := false - if p.modelCatalog != nil { - isProviderAllowed = p.modelCatalog.IsModelAllowedForProvider(schemas.ModelProvider(config.Provider), modelStr, config.AllowedModels) + if p.modelCatalog != nil && p.inMemoryStore != nil { + provider := schemas.ModelProvider(config.Provider) + providerConfig := p.inMemoryStore.GetConfiguredProviders()[provider] + isProviderAllowed = p.modelCatalog.IsModelAllowedForProvider(provider, modelStr, &providerConfig, config.AllowedModels) } else { // Fallback when model catalog is not available: simple string matching if len(config.AllowedModels) == 0 { diff --git a/plugins/governance/model_provider_governance_test.go b/plugins/governance/model_provider_governance_test.go index a17855552a..fb8d188f66 100644 --- a/plugins/governance/model_provider_governance_test.go +++ b/plugins/governance/model_provider_governance_test.go @@ -793,7 +793,7 @@ func TestResolver_EvaluateModelAndProviderRequest_NoConfigs(t *testing.T) { store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{}, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -810,7 +810,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderBudgetExceeded(t *test }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -828,7 +828,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderRateLimitExceeded(t *t }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -846,7 +846,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ModelBudgetExceeded(t *testing }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -864,7 +864,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ModelRateLimitExceeded(t *test }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -882,7 +882,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ModelRateLimitExceeded_Request }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -905,7 +905,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderBudgetThenModelBudget( }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -929,7 +929,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderRateLimitThenModelRate }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -953,7 +953,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderRateLimitThenModelRate }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -980,7 +980,7 @@ func TestResolver_EvaluateModelAndProviderRequest_AllChecksPass(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) result := resolver.EvaluateModelAndProviderRequest(ctx, schemas.OpenAI, "gpt-4") @@ -998,7 +998,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderOnly_NoModel(t *testin }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) // No model provided @@ -1016,7 +1016,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ModelOnly_NoProvider(t *testin }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) // No provider provided @@ -1036,7 +1036,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderSpecificBudget_Differe }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) // Request with Azure (different provider) for same model should pass @@ -1056,7 +1056,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderSpecificRateLimit_Diff }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) // Request with Azure (different provider) for same model should pass @@ -1076,7 +1076,7 @@ func TestResolver_EvaluateModelAndProviderRequest_ProviderSpecificRateLimit_Diff }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) // Request with Azure (different provider) for same model should pass diff --git a/plugins/governance/resolver.go b/plugins/governance/resolver.go index 8d3da777af..bccca93a67 100644 --- a/plugins/governance/resolver.go +++ b/plugins/governance/resolver.go @@ -62,17 +62,19 @@ type UsageInfo struct { // BudgetResolver provides decision logic for the new hierarchical governance system type BudgetResolver struct { - store GovernanceStore - logger schemas.Logger - modelCatalog *modelcatalog.ModelCatalog + store GovernanceStore + logger schemas.Logger + modelCatalog *modelcatalog.ModelCatalog + governanceInMemoryStore InMemoryStore } // NewBudgetResolver creates a new budget-based governance resolver -func NewBudgetResolver(store GovernanceStore, modelCatalog *modelcatalog.ModelCatalog, logger schemas.Logger) *BudgetResolver { +func NewBudgetResolver(store GovernanceStore, modelCatalog *modelcatalog.ModelCatalog, logger schemas.Logger, governanceInMemoryStore InMemoryStore) *BudgetResolver { return &BudgetResolver{ - store: store, - logger: logger, - modelCatalog: modelCatalog, + store: store, + logger: logger, + modelCatalog: modelCatalog, + governanceInMemoryStore: governanceInMemoryStore, } } @@ -334,8 +336,9 @@ func (r *BudgetResolver) isModelAllowed(vk *configstoreTables.TableVirtualKey, p // Delegate model allowance check to model catalog // This handles all cross-provider logic (OpenRouter, Vertex, Groq, Bedrock) // and provider-prefixed allowed_models entries - if r.modelCatalog != nil { - return r.modelCatalog.IsModelAllowedForProvider(provider, model, pc.AllowedModels) + if r.modelCatalog != nil && r.governanceInMemoryStore != nil { + providerConfig := r.governanceInMemoryStore.GetConfiguredProviders()[provider] + return r.modelCatalog.IsModelAllowedForProvider(provider, model, &providerConfig, pc.AllowedModels) } // Fallback when model catalog is not available: simple string matching if len(pc.AllowedModels) == 0 { diff --git a/plugins/governance/resolver_test.go b/plugins/governance/resolver_test.go index 7e55c57328..5dc1d3e0d7 100644 --- a/plugins/governance/resolver_test.go +++ b/plugins/governance/resolver_test.go @@ -23,7 +23,7 @@ func TestBudgetResolver_EvaluateRequest_AllowedRequest(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -38,7 +38,7 @@ func TestBudgetResolver_EvaluateRequest_VirtualKeyNotFound(t *testing.T) { store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{}, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-nonexistent", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -56,7 +56,7 @@ func TestBudgetResolver_EvaluateRequest_VirtualKeyBlocked(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -79,7 +79,7 @@ func TestBudgetResolver_EvaluateRequest_ProviderBlocked(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} // Try to use OpenAI (not allowed) @@ -111,7 +111,7 @@ func TestBudgetResolver_EvaluateRequest_ModelBlocked(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} // Try to use gpt-4o-mini (not in allowed list) @@ -134,7 +134,7 @@ func TestBudgetResolver_EvaluateRequest_RateLimitExceeded_TokenLimit(t *testing. }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -157,7 +157,7 @@ func TestBudgetResolver_EvaluateRequest_RateLimitExceeded_RequestLimit(t *testin }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -195,7 +195,7 @@ func TestBudgetResolver_EvaluateRequest_RateLimitExpired(t *testing.T) { err = store.ResetExpiredRateLimits(context.Background(), expiredRateLimits) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -217,7 +217,7 @@ func TestBudgetResolver_EvaluateRequest_BudgetExceeded(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -244,7 +244,7 @@ func TestBudgetResolver_EvaluateRequest_BudgetExpired(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -278,7 +278,7 @@ func TestBudgetResolver_EvaluateRequest_MultiLevelBudgetHierarchy(t *testing.T) }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} // Test: All under limit should pass @@ -312,7 +312,7 @@ func TestBudgetResolver_EvaluateRequest_ProviderLevelRateLimit(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -335,7 +335,7 @@ func TestBudgetResolver_CheckRateLimits_BothExceeded(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) @@ -350,7 +350,7 @@ func TestBudgetResolver_IsProviderAllowed(t *testing.T) { store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{}, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) tests := []struct { name string @@ -398,7 +398,7 @@ func TestBudgetResolver_IsModelAllowed(t *testing.T) { store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{}, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) tests := []struct { name string @@ -473,7 +473,7 @@ func TestBudgetResolver_ContextPopulation(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) ctx := &schemas.BifrostContext{} result := resolver.EvaluateVirtualKeyRequest(ctx, "sk-bf-test", schemas.OpenAI, "gpt-4", schemas.ChatCompletionRequest) diff --git a/plugins/governance/tracker_test.go b/plugins/governance/tracker_test.go index 7102a8e06d..6af947d0fa 100644 --- a/plugins/governance/tracker_test.go +++ b/plugins/governance/tracker_test.go @@ -25,7 +25,7 @@ func TestUsageTracker_UpdateUsage_FailedRequest(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger) defer tracker.Cleanup() @@ -60,7 +60,7 @@ func TestUsageTracker_UpdateUsage_VirtualKeyNotFound(t *testing.T) { store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{}, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger) defer tracker.Cleanup() @@ -94,7 +94,7 @@ func TestUsageTracker_UpdateUsage_StreamingOptimization(t *testing.T) { }, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger) defer tracker.Cleanup() @@ -157,7 +157,7 @@ func TestUsageTracker_Cleanup(t *testing.T) { store, err := NewLocalGovernanceStore(context.Background(), logger, nil, &configstore.GovernanceConfig{}, nil) require.NoError(t, err) - resolver := NewBudgetResolver(store, nil, logger) + resolver := NewBudgetResolver(store, nil, logger, nil) tracker := NewUsageTracker(context.Background(), store, resolver, nil, logger) // Should cleanup without error diff --git a/plugins/governance/utils.go b/plugins/governance/utils.go index f33abb3aa1..3ba0849f6d 100644 --- a/plugins/governance/utils.go +++ b/plugins/governance/utils.go @@ -2,6 +2,7 @@ package governance import ( + "slices" "strings" bifrost "github.com/maximhq/bifrost/core" @@ -113,9 +114,17 @@ func (p *GovernancePlugin) filterModelsForVirtualKey( isAllowed := false for _, pc := range vk.ProviderConfigs { if pc.Provider == string(provider) { - if p.modelCatalog.IsModelAllowedForProvider(provider, modelName, pc.AllowedModels) { - isAllowed = true - break + if p.modelCatalog != nil && p.inMemoryStore != nil { + providerConfig := p.inMemoryStore.GetConfiguredProviders()[provider] + if p.modelCatalog.IsModelAllowedForProvider(provider, modelName, &providerConfig, pc.AllowedModels) { + isAllowed = true + break + } + } else { + if len(pc.AllowedModels) == 0 || slices.Contains(pc.AllowedModels, modelName) { + isAllowed = true + break + } } } } diff --git a/plugins/governance/version b/plugins/governance/version index 9b2fc0c8ca..212432caa4 100644 --- a/plugins/governance/version +++ b/plugins/governance/version @@ -1 +1 @@ -1.4.38 +1.4.39 diff --git a/plugins/jsonparser/go.mod b/plugins/jsonparser/go.mod index f56f394e76..063b261a51 100644 --- a/plugins/jsonparser/go.mod +++ b/plugins/jsonparser/go.mod @@ -1,8 +1,8 @@ module github.com/maximhq/bifrost/plugins/jsonparser -go 1.26.2 +go 1.26.1 -require github.com/maximhq/bifrost/core v1.4.19 +require github.com/maximhq/bifrost/core v1.4.22 require ( cloud.google.com/go v0.123.0 // indirect diff --git a/plugins/jsonparser/go.sum b/plugins/jsonparser/go.sum index b0d542b919..54b9d919a9 100644 --- a/plugins/jsonparser/go.sum +++ b/plugins/jsonparser/go.sum @@ -109,8 +109,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/plugins/jsonparser/version b/plugins/jsonparser/version index b242277136..9b2fc0c8ca 100644 --- a/plugins/jsonparser/version +++ b/plugins/jsonparser/version @@ -1 +1 @@ -1.4.37 +1.4.38 diff --git a/plugins/litellmcompat/go.mod b/plugins/litellmcompat/go.mod index e2c5d88a52..bd37abdf72 100644 --- a/plugins/litellmcompat/go.mod +++ b/plugins/litellmcompat/go.mod @@ -1,10 +1,10 @@ module github.com/maximhq/bifrost/plugins/litellmcompat -go 1.26.2 +go 1.26.1 require ( - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 ) require ( diff --git a/plugins/litellmcompat/go.sum b/plugins/litellmcompat/go.sum index 551c849fd0..60fc5426ea 100644 --- a/plugins/litellmcompat/go.sum +++ b/plugins/litellmcompat/go.sum @@ -193,10 +193,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= diff --git a/plugins/litellmcompat/version b/plugins/litellmcompat/version index 24ff85581f..1fe6958562 100644 --- a/plugins/litellmcompat/version +++ b/plugins/litellmcompat/version @@ -1 +1 @@ -0.0.27 +0.0.28 diff --git a/plugins/logging/go.mod b/plugins/logging/go.mod index 847efe0d82..1bd0be65d5 100644 --- a/plugins/logging/go.mod +++ b/plugins/logging/go.mod @@ -1,11 +1,11 @@ module github.com/maximhq/bifrost/plugins/logging -go 1.26.2 +go 1.26.1 require ( github.com/bytedance/sonic v1.15.0 - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 ) require ( diff --git a/plugins/logging/go.sum b/plugins/logging/go.sum index 551c849fd0..60fc5426ea 100644 --- a/plugins/logging/go.sum +++ b/plugins/logging/go.sum @@ -193,10 +193,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= diff --git a/plugins/logging/main.go b/plugins/logging/main.go index 550f3e75fe..a66432b415 100644 --- a/plugins/logging/main.go +++ b/plugins/logging/main.go @@ -680,6 +680,21 @@ func (p *LoggerPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *schemas. if bifrost.IsStreamRequestType(requestType) { entry.Stream = true } + + // For streaming errors, finalize and read accumulated chunks so logs retain pre-error stream metadata + if bifrost.IsStreamRequestType(requestType) && + requestType != schemas.PassthroughStreamRequest && + requestType != schemas.RealtimeRequest && + tracer != nil && + traceID != "" { + if accResult := tracer.ProcessStreamingChunk(traceID, true, result, bifrostErr); accResult != nil { + if streamResponse := convertToProcessedStreamResponse(accResult, requestType); streamResponse != nil { + p.applyStreamingOutputToEntry(entry, streamResponse) + } + } + tracer.CleanupStreamAccumulator(traceID) + } + // Serialize error details immediately since bifrostErr may be released // back to the pool before the async batch writer processes this entry. // Also set ErrorDetailsParsed for UI callback (JSON serialization uses this field). @@ -688,14 +703,14 @@ func (p *LoggerPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *schemas. } entry.ErrorDetailsParsed = bifrostErr if p.disableContentLogging == nil || !*p.disableContentLogging { - if bifrostErr.ExtraFields.RawRequest != nil { + if entry.RawRequest == "" && bifrostErr.ExtraFields.RawRequest != nil { rawReqBytes, err := sonic.Marshal(bifrostErr.ExtraFields.RawRequest) if err == nil { entry.RawRequest = string(rawReqBytes) } } - if bifrostErr.ExtraFields.RawResponse != nil { + if entry.RawResponse == "" && bifrostErr.ExtraFields.RawResponse != nil { rawRespBytes, err := sonic.Marshal(bifrostErr.ExtraFields.RawResponse) if err == nil { entry.RawResponse = string(rawRespBytes) diff --git a/plugins/logging/operations.go b/plugins/logging/operations.go index cafac65fc0..499ce57a02 100644 --- a/plugins/logging/operations.go +++ b/plugins/logging/operations.go @@ -542,13 +542,12 @@ func (p *LoggerPlugin) applyStreamingOutputToEntry(entry *logstore.Log, streamRe } latF := float64(streamResponse.Data.Latency) entry.Latency = &latF - return + } else { + entry.Status = "success" + latF := float64(streamResponse.Data.Latency) + entry.Latency = &latF } - entry.Status = "success" - latF := float64(streamResponse.Data.Latency) - entry.Latency = &latF - // Update model if provided if streamResponse.Data.Model != "" { entry.Model = streamResponse.Data.Model @@ -1093,19 +1092,19 @@ func buildResponseForRequestType(requestType schemas.RequestType, usage *schemas CachedWriteTokens: usage.PromptTokensDetails.CachedWriteTokens, } } - if usage.CompletionTokensDetails != nil { - respUsage.OutputTokensDetails = &schemas.ResponsesResponseOutputTokens{ - TextTokens: usage.CompletionTokensDetails.TextTokens, - AcceptedPredictionTokens: usage.CompletionTokensDetails.AcceptedPredictionTokens, - AudioTokens: usage.CompletionTokensDetails.AudioTokens, - ImageTokens: usage.CompletionTokensDetails.ImageTokens, - ReasoningTokens: usage.CompletionTokensDetails.ReasoningTokens, - RejectedPredictionTokens: usage.CompletionTokensDetails.RejectedPredictionTokens, - CitationTokens: usage.CompletionTokensDetails.CitationTokens, - NumSearchQueries: usage.CompletionTokensDetails.NumSearchQueries, + if usage.CompletionTokensDetails != nil { + respUsage.OutputTokensDetails = &schemas.ResponsesResponseOutputTokens{ + TextTokens: usage.CompletionTokensDetails.TextTokens, + AcceptedPredictionTokens: usage.CompletionTokensDetails.AcceptedPredictionTokens, + AudioTokens: usage.CompletionTokensDetails.AudioTokens, + ImageTokens: usage.CompletionTokensDetails.ImageTokens, + ReasoningTokens: usage.CompletionTokensDetails.ReasoningTokens, + RejectedPredictionTokens: usage.CompletionTokensDetails.RejectedPredictionTokens, + CitationTokens: usage.CompletionTokensDetails.CitationTokens, + NumSearchQueries: usage.CompletionTokensDetails.NumSearchQueries, + } } } - } return &schemas.BifrostResponse{ ResponsesResponse: &schemas.BifrostResponsesResponse{ Usage: respUsage, diff --git a/plugins/logging/version b/plugins/logging/version index 9b2fc0c8ca..212432caa4 100644 --- a/plugins/logging/version +++ b/plugins/logging/version @@ -1 +1 @@ -1.4.38 +1.4.39 diff --git a/plugins/maxim/go.mod b/plugins/maxim/go.mod index ee75d3ae18..9906fc8199 100644 --- a/plugins/maxim/go.mod +++ b/plugins/maxim/go.mod @@ -1,10 +1,10 @@ module github.com/maximhq/bifrost/plugins/maxim -go 1.26.2 +go 1.26.1 require ( - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 github.com/maximhq/maxim-go v0.2.1 ) diff --git a/plugins/maxim/go.sum b/plugins/maxim/go.sum index b85f533eee..bc7a0ebcaa 100644 --- a/plugins/maxim/go.sum +++ b/plugins/maxim/go.sum @@ -193,10 +193,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= github.com/maximhq/maxim-go v0.2.1 h1:hCp8dQ4HsyyNC+y5HCUuY/HFD0sOnGkjL5MdYCHkgEQ= github.com/maximhq/maxim-go v0.2.1/go.mod h1:nwFznXy0Dn4mxXGU4X+BCnE3VP68L+FPEaW0yUgk96o= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= diff --git a/plugins/maxim/version b/plugins/maxim/version index c7966c0d95..9caff18fd7 100644 --- a/plugins/maxim/version +++ b/plugins/maxim/version @@ -1 +1 @@ -1.5.38 +1.5.39 diff --git a/plugins/mocker/go.mod b/plugins/mocker/go.mod index 34ebd36f31..5363ca9fcf 100644 --- a/plugins/mocker/go.mod +++ b/plugins/mocker/go.mod @@ -1,10 +1,10 @@ module github.com/maximhq/bifrost/plugins/mocker -go 1.26.2 +go 1.26.1 require ( github.com/jaswdr/faker/v2 v2.8.0 - github.com/maximhq/bifrost/core v1.4.19 + github.com/maximhq/bifrost/core v1.4.22 ) require ( diff --git a/plugins/mocker/go.sum b/plugins/mocker/go.sum index c2be4fb774..b7f3bcdf92 100644 --- a/plugins/mocker/go.sum +++ b/plugins/mocker/go.sum @@ -111,8 +111,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/plugins/mocker/version b/plugins/mocker/version index b242277136..9b2fc0c8ca 100644 --- a/plugins/mocker/version +++ b/plugins/mocker/version @@ -1 +1 @@ -1.4.37 +1.4.38 diff --git a/plugins/otel/go.mod b/plugins/otel/go.mod index 28c10d5ba2..92c347ddde 100644 --- a/plugins/otel/go.mod +++ b/plugins/otel/go.mod @@ -1,10 +1,10 @@ module github.com/maximhq/bifrost/plugins/otel -go 1.26.2 +go 1.26.1 require ( - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 go.opentelemetry.io/otel v1.43.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 diff --git a/plugins/otel/go.sum b/plugins/otel/go.sum index d777f95785..4c76f83ed9 100644 --- a/plugins/otel/go.sum +++ b/plugins/otel/go.sum @@ -197,10 +197,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= diff --git a/plugins/otel/main.go b/plugins/otel/main.go index 31c799d533..faa32944e9 100644 --- a/plugins/otel/main.go +++ b/plugins/otel/main.go @@ -10,6 +10,7 @@ import ( "github.com/bytedance/sonic" "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/modelcatalog" + "go.opentelemetry.io/otel/attribute" commonpb "go.opentelemetry.io/proto/otlp/common/v1" ) @@ -50,7 +51,7 @@ type Config struct { TraceType TraceType `json:"trace_type"` Protocol Protocol `json:"protocol"` TLSCACert string `json:"tls_ca_cert"` - Insecure bool `json:"insecure"` // Skip TLS when true; ignored if TLSCACert is set + Insecure bool `json:"insecure"` // Skip TLS when true; ignored if TLSCACert is set. Defaults to true when omitted. // Metrics push configuration MetricsEnabled bool `json:"metrics_enabled"` @@ -58,6 +59,28 @@ type Config struct { MetricsPushInterval int `json:"metrics_push_interval"` // in seconds, default 15 } +// UnmarshalJSON applies field defaults that the zero-value wouldn't capture. +// Specifically, Insecure defaults to true when the key is omitted so http:// +// collectors work out-of-the-box without forcing users to set it explicitly. +func (c *Config) UnmarshalJSON(data []byte) error { + type alias Config + aux := struct { + Insecure *bool `json:"insecure"` + *alias + }{ + alias: (*alias)(c), + } + if err := sonic.Unmarshal(data, &aux); err != nil { + return err + } + if aux.Insecure == nil { + c.Insecure = true + } else { + c.Insecure = *aux.Insecure + } + return nil +} + // OtelPlugin is the plugin for OpenTelemetry. // It implements the ObservabilityPlugin interface to receive completed traces // from the tracing middleware and forward them to an OTEL collector. @@ -278,7 +301,6 @@ func (p *OtelPlugin) Inject(ctx context.Context, trace *schemas.Trace) error { } // Helper functions for type-safe attribute extraction from trace spans - func getStringAttr(attrs map[string]any, key string) string { if attrs == nil { return "" @@ -319,76 +341,77 @@ func getFloat64Attr(attrs map[string]any, key string) float64 { return 0 } +// buildSpanAttrs extracts metric dimension attrs from a single attempt span. +func buildSpanAttrs(span *schemas.Span) []attribute.KeyValue { + attrs := span.Attributes + method := getStringAttr(attrs, "request.type") + if method == "" { + method = span.Name + } + return BuildBifrostAttributes( + getStringAttr(attrs, schemas.AttrProviderName), + getStringAttr(attrs, schemas.AttrRequestModel), + method, + getStringAttr(attrs, schemas.AttrVirtualKeyID), + getStringAttr(attrs, schemas.AttrVirtualKeyName), + getStringAttr(attrs, schemas.AttrSelectedKeyID), + getStringAttr(attrs, schemas.AttrSelectedKeyName), + getIntAttr(attrs, schemas.AttrNumberOfRetries), + getIntAttr(attrs, schemas.AttrFallbackIndex), + getStringAttr(attrs, schemas.AttrTeamID), + getStringAttr(attrs, schemas.AttrTeamName), + getStringAttr(attrs, schemas.AttrCustomerID), + getStringAttr(attrs, schemas.AttrCustomerName), + ) +} + // recordMetricsFromTrace extracts metrics data from a completed trace and records them // via the OTEL metrics exporter. This is called from Inject after trace emission. +// +// Per-attempt metrics (upstream_requests, errors, success, latency) are recorded once +// per llm.call/retry span so fallback attempts and failed retries are counted with +// their own provider/model/fallback_index labels. Per-trace metrics (tokens, cost, +// TTFT) are recorded once, keyed off the final (latest) attempt span. func (p *OtelPlugin) recordMetricsFromTrace(ctx context.Context, trace *schemas.Trace) { if trace == nil || p.metricsExporter == nil { return } - // Prefer the last attempt span (LLM call or retry) so metrics reflect the final outcome. - var llmSpan *schemas.Span + var finalSpan *schemas.Span for _, span := range trace.Spans { if span.Kind != schemas.SpanKindLLMCall && span.Kind != schemas.SpanKindRetry { continue } - if llmSpan == nil || span.EndTime.After(llmSpan.EndTime) { - llmSpan = span - } - } - if llmSpan == nil { - llmSpan = trace.RootSpan - } - if llmSpan == nil { - return - } + spanAttrs := buildSpanAttrs(span) - attrs := llmSpan.Attributes + p.metricsExporter.RecordUpstreamRequest(ctx, spanAttrs...) - // Extract all metric dimensions from span attributes - provider := getStringAttr(attrs, schemas.AttrProviderName) - model := getStringAttr(attrs, schemas.AttrRequestModel) - // Prefer request.type attribute to keep the method stable across retries - method := getStringAttr(attrs, "request.type") - if method == "" { - method = llmSpan.Name - } - virtualKeyID := getStringAttr(attrs, schemas.AttrVirtualKeyID) - virtualKeyName := getStringAttr(attrs, schemas.AttrVirtualKeyName) - selectedKeyID := getStringAttr(attrs, schemas.AttrSelectedKeyID) - selectedKeyName := getStringAttr(attrs, schemas.AttrSelectedKeyName) - numberOfRetries := getIntAttr(attrs, schemas.AttrNumberOfRetries) - fallbackIndex := getIntAttr(attrs, schemas.AttrFallbackIndex) - teamID := getStringAttr(attrs, schemas.AttrTeamID) - teamName := getStringAttr(attrs, schemas.AttrTeamName) - customerID := getStringAttr(attrs, schemas.AttrCustomerID) - customerName := getStringAttr(attrs, schemas.AttrCustomerName) - - // Build common attributes for all metrics - otelAttrs := BuildBifrostAttributes( - provider, model, method, - virtualKeyID, virtualKeyName, - selectedKeyID, selectedKeyName, - numberOfRetries, fallbackIndex, - teamID, teamName, customerID, customerName, - ) + if !span.StartTime.IsZero() && !span.EndTime.IsZero() { + latencySeconds := span.EndTime.Sub(span.StartTime).Seconds() + p.metricsExporter.RecordUpstreamLatency(ctx, latencySeconds, spanAttrs...) + } - // Record upstream request count - p.metricsExporter.RecordUpstreamRequest(ctx, otelAttrs...) + if span.Status == schemas.SpanStatusError { + p.metricsExporter.RecordErrorRequest(ctx, spanAttrs...) + } else { + p.metricsExporter.RecordSuccessRequest(ctx, spanAttrs...) + } - // Record latency (from span duration) - if !llmSpan.StartTime.IsZero() && !llmSpan.EndTime.IsZero() { - latencySeconds := llmSpan.EndTime.Sub(llmSpan.StartTime).Seconds() - p.metricsExporter.RecordUpstreamLatency(ctx, latencySeconds, otelAttrs...) + if finalSpan == nil || span.EndTime.After(finalSpan.EndTime) { + finalSpan = span + } } - // Record success or error based on span status - if llmSpan.Status == schemas.SpanStatusError { - p.metricsExporter.RecordErrorRequest(ctx, otelAttrs...) - } else { - p.metricsExporter.RecordSuccessRequest(ctx, otelAttrs...) + if finalSpan == nil { + finalSpan = trace.RootSpan } + if finalSpan == nil { + return + } + + attrs := finalSpan.Attributes + otelAttrs := buildSpanAttrs(finalSpan) // Record token usage - try both naming conventions inputTokens := getIntAttr(attrs, schemas.AttrPromptTokens) diff --git a/plugins/otel/metrics.go b/plugins/otel/metrics.go index 18725d8951..e1b5d85089 100644 --- a/plugins/otel/metrics.go +++ b/plugins/otel/metrics.go @@ -17,7 +17,7 @@ import ( "go.opentelemetry.io/otel/metric" sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" - semconv "go.opentelemetry.io/otel/semconv/v1.39.0" + semconv "go.opentelemetry.io/otel/semconv/v1.40.0" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" ) diff --git a/plugins/otel/version b/plugins/otel/version index 36638c8584..54eae6b4d8 100644 --- a/plugins/otel/version +++ b/plugins/otel/version @@ -1 +1 @@ -1.1.37 +1.1.38 diff --git a/plugins/semanticcache/go.mod b/plugins/semanticcache/go.mod index 8848124240..04075eca52 100644 --- a/plugins/semanticcache/go.mod +++ b/plugins/semanticcache/go.mod @@ -1,12 +1,12 @@ module github.com/maximhq/bifrost/plugins/semanticcache -go 1.26.2 +go 1.26.1 require ( github.com/cespare/xxhash/v2 v2.3.0 github.com/google/uuid v1.6.0 - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 github.com/maximhq/bifrost/plugins/mocker v1.4.17 ) diff --git a/plugins/semanticcache/go.sum b/plugins/semanticcache/go.sum index 404b8b82c6..11a296a9da 100644 --- a/plugins/semanticcache/go.sum +++ b/plugins/semanticcache/go.sum @@ -195,10 +195,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= github.com/maximhq/bifrost/plugins/mocker v1.4.17 h1:CEItx77k22fS/N5K8/dCQpse88yfbgzVebQWJXOH4NY= github.com/maximhq/bifrost/plugins/mocker v1.4.17/go.mod h1:RrA/XyRkggxYiK10k6D6r9VjfmRyiGBIW92ZvhWAtUw= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= diff --git a/plugins/semanticcache/version b/plugins/semanticcache/version index f2d2f271e8..b242277136 100644 --- a/plugins/semanticcache/version +++ b/plugins/semanticcache/version @@ -1 +1 @@ -1.4.36 +1.4.37 diff --git a/plugins/telemetry/go.mod b/plugins/telemetry/go.mod index 0a3fc77f42..a2de5edc50 100644 --- a/plugins/telemetry/go.mod +++ b/plugins/telemetry/go.mod @@ -1,10 +1,10 @@ module github.com/maximhq/bifrost/plugins/telemetry -go 1.26.2 +go 1.26.1 require ( - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 github.com/prometheus/client_golang v1.23.2 github.com/valyala/fasthttp v1.68.0 ) diff --git a/plugins/telemetry/go.sum b/plugins/telemetry/go.sum index a182904490..cc2f5d9d8b 100644 --- a/plugins/telemetry/go.sum +++ b/plugins/telemetry/go.sum @@ -195,10 +195,10 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= diff --git a/plugins/telemetry/version b/plugins/telemetry/version index 9b2fc0c8ca..212432caa4 100644 --- a/plugins/telemetry/version +++ b/plugins/telemetry/version @@ -1 +1 @@ -1.4.38 +1.4.39 diff --git a/tests/integrations/python/uv.lock b/tests/integrations/python/uv.lock index 68c1924264..dbb9a8254d 100644 --- a/tests/integrations/python/uv.lock +++ b/tests/integrations/python/uv.lock @@ -217,14 +217,14 @@ wheels = [ [[package]] name = "authlib" -version = "1.6.6" +version = "1.6.11" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/9b/b1661026ff24bc641b76b78c5222d614776b0c085bcfdac9bd15a1cb4b35/authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e", size = 164894 } +sdist = { url = "https://files.pythonhosted.org/packages/28/10/b325d58ffe86815b399334a101e63bc6fa4e1953921cb23703b48a0a0220/authlib-1.6.11.tar.gz", hash = "sha256:64db35b9b01aeccb4715a6c9a6613a06f2bd7be2ab9d2eb89edd1dfc7580a38f", size = 165359 } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/51/321e821856452f7386c4e9df866f196720b1ad0c5ea1623ea7399969ae3b/authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd", size = 244005 }, + { url = "https://files.pythonhosted.org/packages/57/2f/55fca558f925a51db046e5b929deb317ddb05afed74b22d89f4eca578980/authlib-1.6.11-py2.py3-none-any.whl", hash = "sha256:c8687a9a26451c51a34a06fa17bb97cb15bba46a6a626755e2d7f50da8bff3e3", size = 244469 }, ] [[package]] @@ -2132,7 +2132,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.2.28" +version = "1.2.31" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpatch" }, @@ -2144,9 +2144,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "uuid-utils" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f8/a4/317a1a3ac1df33a64adb3670bf88bbe3b3d5baa274db6863a979db472897/langchain_core-1.2.28.tar.gz", hash = "sha256:271a3d8bd618f795fdeba112b0753980457fc90537c46a0c11998516a74dc2cb", size = 846119 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/5a/7523ff55668a233beef7e909e8e2074a1cc3b620e0bbf0a4ec5f38549b3b/langchain_core-1.2.31.tar.gz", hash = "sha256:aad3ecc9e4dce2dd2bb79526c81b92e5322fd81db7834a031cb80359f2e3ebaa", size = 850756 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/92/32f785f077c7e898da97064f113c73fbd9ad55d1e2169cf3a391b183dedb/langchain_core-1.2.28-py3-none-any.whl", hash = "sha256:80764232581eaf8057bcefa71dbf8adc1f6a28d257ebd8b95ba9b8b452e8c6ac", size = 508727 }, + { url = "https://files.pythonhosted.org/packages/52/02/668ddf4f1cf963ad691bdbea672a85244e6271eb0a4acfaf662bbd94a3b1/langchain_core-1.2.31-py3-none-any.whl", hash = "sha256:c407193edb99311cc36ec3e4d3667a065bbc4d7d72fbb6e368538b9b134d4033", size = 513264 }, ] [[package]] @@ -2203,16 +2203,16 @@ wheels = [ [[package]] name = "langchain-openai" -version = "1.1.4" +version = "1.1.14" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, { name = "openai" }, { name = "tiktoken" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cf/f8/223a340be988bc6f87b57837939589930675041a19382462f48827a67575/langchain_openai-1.1.4.tar.gz", hash = "sha256:c3b6d5b58fdeefbeaa90fad9169cf79dddd5db78317ef2f57aa3da9815dc18b6", size = 1038144 } +sdist = { url = "https://files.pythonhosted.org/packages/8e/f5/b1a56f703fb90952b07ff9fb5507123a39df1267d62a7f2bb821c5dbb628/langchain_openai-1.1.14.tar.gz", hash = "sha256:71b4262932fabe506ce79c175dbc956cc48f24d81e20b27662df493147750643", size = 1115195 } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/77/1436f498a71e9de771976267694c6f1a14cd32de4989b839a29a3950f793/langchain_openai-1.1.4-py3-none-any.whl", hash = "sha256:34ea8d33283f4ce56d4cf4abf0e3d51fef349f6ace2407645f2ff720644f2262", size = 84582 }, + { url = "https://files.pythonhosted.org/packages/0b/fa/8c33befbc0cf81b21371cc1dab4e7bf94a80b8116194f263a5021ec02529/langchain_openai-1.1.14-py3-none-any.whl", hash = "sha256:cb525d2011f9813fc15a7dcfd4bca5b87badcbcb2c113a7fbe45d1b8a1bbb69c", size = 88705 }, ] [[package]] @@ -2239,14 +2239,14 @@ wheels = [ [[package]] name = "langchain-text-splitters" -version = "1.1.0" +version = "1.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/41/42/c178dcdc157b473330eb7cc30883ea69b8ec60078c7b85e2d521054c4831/langchain_text_splitters-1.1.0.tar.gz", hash = "sha256:75e58acb7585dc9508f3cd9d9809cb14751283226c2d6e21fb3a9ae57582ca22", size = 272230 } +sdist = { url = "https://files.pythonhosted.org/packages/26/9f/6c545900fefb7b00ddfa3f16b80d61338a0ec68c31c5451eeeab99082760/langchain_text_splitters-1.1.2.tar.gz", hash = "sha256:782a723db0a4746ac91e251c7c1d57fd23636e4f38ed733074e28d7a86f41627", size = 293580 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d8/1a/a84ed1c046deecf271356b0179c1b9fba95bfdaa6f934e1849dee26fad7b/langchain_text_splitters-1.1.0-py3-none-any.whl", hash = "sha256:f00341fe883358786104a5f881375ac830a4dd40253ecd42b4c10536c6e4693f", size = 34182 }, + { url = "https://files.pythonhosted.org/packages/d3/26/1ef06f56198d631296d646a6223de35bcc6cf9795ceb2442816bc963b84c/langchain_text_splitters-1.1.2-py3-none-any.whl", hash = "sha256:a2de0d799ff31886429fd6e2e0032df275b60ec817c19059a7b46181cc1c2f10", size = 35903 }, ] [[package]] @@ -2307,7 +2307,7 @@ wheels = [ [[package]] name = "langsmith" -version = "0.5.0" +version = "0.7.32" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -2317,11 +2317,12 @@ dependencies = [ { name = "requests" }, { name = "requests-toolbelt" }, { name = "uuid-utils" }, + { name = "xxhash" }, { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/4b/d448307e8557e36b20008d0d1cd0a58233c38d90bf978e1d093be0ca4cb2/langsmith-0.5.0.tar.gz", hash = "sha256:5cadf1ddd30e838cf61679f4a776aaef638d4b02ffbceba9f73283caebd39e1b", size = 869272 } +sdist = { url = "https://files.pythonhosted.org/packages/2f/b4/a0b4a501bee6b8a741ce29f8c48155b132118483cddc6f9247735ddb38fa/langsmith-0.7.32.tar.gz", hash = "sha256:b59b8e106d0e4c4842e158229296086e2aa7c561e3f602acda73d3ad0062e915", size = 1184518 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/8a/d9bc95607846bc82fbe0b98d2592ffb5e036c97a362735ae926e3d519df7/langsmith-0.5.0-py3-none-any.whl", hash = "sha256:a83750cb3dccb33148d4ffe005e3e03080fad13e01671efbb74c9a68813bfef8", size = 273711 }, + { url = "https://files.pythonhosted.org/packages/62/bc/148f98ac7dad73ac5e1b1c985290079cfeeb9ba13d760a24f25002beb2c9/langsmith-0.7.32-py3-none-any.whl", hash = "sha256:e1fde928990c4c52f47dc5132708cec674355d9101723d564183e965f383bf5f", size = 378272 }, ] [[package]] @@ -2995,7 +2996,7 @@ wheels = [ [[package]] name = "openai" -version = "2.13.0" +version = "2.32.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -3007,9 +3008,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0f/39/8e347e9fda125324d253084bb1b82407e5e3c7777a03dc398f79b2d95626/openai-2.13.0.tar.gz", hash = "sha256:9ff633b07a19469ec476b1e2b5b26c5ef700886524a7a72f65e6f0b5203142d5", size = 626583 } +sdist = { url = "https://files.pythonhosted.org/packages/ed/59/bdcc6b759b8c42dd73afaf5bf8f902c04b37987a5514dbc1c64dba390fef/openai-2.32.0.tar.gz", hash = "sha256:c54b27a9e4cb8d51f0dd94972ffd1a04437efeb259a9e60d8922b8bd26fe55e0", size = 693286 } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/d5/eb52edff49d3d5ea116e225538c118699ddeb7c29fa17ec28af14bc10033/openai-2.13.0-py3-none-any.whl", hash = "sha256:746521065fed68df2f9c2d85613bb50844343ea81f60009b60e6a600c9352c79", size = 1066837 }, + { url = "https://files.pythonhosted.org/packages/1e/c1/d6e64ccd0536bf616556f0cad2b6d94a8125f508d25cfd814b1d2db4e2f1/openai-2.32.0-py3-none-any.whl", hash = "sha256:4dcc9badeb4bf54ad0d187453742f290226d30150890b7890711bda4f32f192f", size = 1162570 }, ] [[package]] @@ -4253,11 +4254,11 @@ wheels = [ [[package]] name = "python-multipart" -version = "0.0.20" +version = "0.0.26" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158 } +sdist = { url = "https://files.pythonhosted.org/packages/88/71/b145a380824a960ebd60e1014256dbb7d2253f2316ff2d73dfd8928ec2c3/python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17", size = 43501 } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546 }, + { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847 }, ] [[package]] diff --git a/tests/integrations/typescript/package-lock.json b/tests/integrations/typescript/package-lock.json index 00576160d3..daa9896cbd 100644 --- a/tests/integrations/typescript/package-lock.json +++ b/tests/integrations/typescript/package-lock.json @@ -16,6 +16,7 @@ "@langchain/core": "^1.1.39", "@langchain/google-genai": "^2.1.26", "@langchain/openai": "^1.4.4", + "langsmith": "^0.5.19", "openai": "^6.15.0", "yaml": "^2.6.0", "zod": "^3.24.0" @@ -4224,9 +4225,9 @@ } }, "node_modules/langsmith": { - "version": "0.5.18", - "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.5.18.tgz", - "integrity": "sha512-3zuZUWffTHQ+73EAwnodADtf534VNEZUpXr9jC12qyG8/IQuJET7PRsCpTb9wX2lmBspakwLUpqpj3tNm/0bVA==", + "version": "0.5.19", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.5.19.tgz", + "integrity": "sha512-5tFoETuFMvGkbPGsINNlIE4Ab86CsPhdPOQZCGwNt/NX0h5NDKQLKOWS/G2XcRUBOQl4mCNbrayUvUTWaIRsCg==", "license": "MIT", "dependencies": { "p-queue": "6.6.2", diff --git a/tests/integrations/typescript/package.json b/tests/integrations/typescript/package.json index e5272b502f..4e44c48e61 100644 --- a/tests/integrations/typescript/package.json +++ b/tests/integrations/typescript/package.json @@ -31,6 +31,7 @@ "@langchain/core": "^1.1.39", "@langchain/google-genai": "^2.1.26", "@langchain/openai": "^1.4.4", + "langsmith": "^0.5.19", "openai": "^6.15.0", "yaml": "^2.6.0", "zod": "^3.24.0" diff --git a/tests/scripts/1millogs/go.mod b/tests/scripts/1millogs/go.mod index e51491f776..c4dd43e882 100644 --- a/tests/scripts/1millogs/go.mod +++ b/tests/scripts/1millogs/go.mod @@ -1,6 +1,6 @@ module github.com/maximhq/bifrost/tests/scripts/1millogs -go 1.26.2 +go 1.26.1 require ( github.com/maximhq/bifrost/core v1.4.18 diff --git a/transports/Dockerfile b/transports/Dockerfile index c8e9335064..5a05df2795 100644 --- a/transports/Dockerfile +++ b/transports/Dockerfile @@ -15,7 +15,7 @@ # Skip the copy-build step since we'll copy the files in the Go build stage # --- Go Build Stage: Compile the Go binary --- - FROM golang:1.26.2-alpine3.23@sha256:c2a1f7b2095d046ae14b286b18413a05bb82c9bca9b25fe7ff5efef0f0826166 AS builder + FROM golang:1.26.1-alpine3.23@sha256:2389ebfa5b7f43eeafbd6be0c3700cc46690ef842ad962f6c5bd6be49ed82039 AS builder WORKDIR /app # Install dependencies including gcc for CGO and sqlite diff --git a/transports/Dockerfile.local b/transports/Dockerfile.local index 1a7d9840b4..8761833577 100644 --- a/transports/Dockerfile.local +++ b/transports/Dockerfile.local @@ -18,7 +18,7 @@ # Skip the copy-build step since we'll copy the files in the Go build stage # --- Go Build Stage: Compile the Go binary using local modules --- - FROM golang:1.26.2-alpine3.23@sha256:c2a1f7b2095d046ae14b286b18413a05bb82c9bca9b25fe7ff5efef0f0826166 AS builder + FROM golang:1.26.1-alpine3.23@sha256:2389ebfa5b7f43eeafbd6be0c3700cc46690ef842ad962f6c5bd6be49ed82039 AS builder WORKDIR /build # Install dependencies including gcc for CGO and sqlite diff --git a/transports/bifrost-http/handlers/asyncinference.go b/transports/bifrost-http/handlers/asyncinference.go index d85504cfcf..b50a28172a 100644 --- a/transports/bifrost-http/handlers/asyncinference.go +++ b/transports/bifrost-http/handlers/asyncinference.go @@ -118,11 +118,10 @@ func (h *AsyncHandler) asyncTextCompletion(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.TextCompletionRequest(bgCtx, bifrostTextReq) @@ -156,11 +155,10 @@ func (h *AsyncHandler) asyncChatCompletion(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.ChatCompletionRequest(bgCtx, bifrostChatReq) @@ -194,11 +192,10 @@ func (h *AsyncHandler) asyncResponses(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.ResponsesRequest(bgCtx, bifrostResponsesReq) @@ -228,11 +225,10 @@ func (h *AsyncHandler) asyncEmbeddings(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.EmbeddingRequest(bgCtx, bifrostEmbeddingReq) @@ -266,11 +262,10 @@ func (h *AsyncHandler) asyncSpeech(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.SpeechRequest(bgCtx, bifrostSpeechReq) @@ -304,11 +299,10 @@ func (h *AsyncHandler) asyncTranscription(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.TranscriptionRequest(bgCtx, bifrostTranscriptionReq) @@ -342,11 +336,10 @@ func (h *AsyncHandler) asyncImageGeneration(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.ImageGenerationRequest(bgCtx, bifrostReq) @@ -380,11 +373,10 @@ func (h *AsyncHandler) asyncImageEdit(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.ImageEditRequest(bgCtx, bifrostReq) @@ -413,11 +405,10 @@ func (h *AsyncHandler) asyncImageVariation(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.ImageVariationRequest(bgCtx, bifrostReq) @@ -446,11 +437,10 @@ func (h *AsyncHandler) asyncRerank(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.RerankRequest(bgCtx, bifrostReq) @@ -479,11 +469,10 @@ func (h *AsyncHandler) asyncOCR(ctx *fasthttp.RequestCtx) { } defer cancel() - virtualKeyValue := getVirtualKeyFromContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, h.config.ClientConfig.AsyncJobResultTTL) job, err := h.executor.SubmitJob( - virtualKeyValue, + bifrostCtx, resultTTL, func(bgCtx *schemas.BifrostContext) (interface{}, *schemas.BifrostError) { return h.client.OCRRequest(bgCtx, bifrostReq) diff --git a/transports/bifrost-http/handlers/devpprof.go b/transports/bifrost-http/handlers/devpprof.go index fca8712d9b..6a2390b1eb 100644 --- a/transports/bifrost-http/handlers/devpprof.go +++ b/transports/bifrost-http/handlers/devpprof.go @@ -26,8 +26,8 @@ const ( metricsCollectionInterval = 10 * time.Second // Number of data points to keep (5 minutes / 10 seconds = 30 points) historySize = 30 - // Top allocations to return - topAllocationsCount = 5 + // Top allocations to return per table (cumulative and in-use) + topAllocationsCount = 50 ) // MemoryStats represents memory statistics at a point in time @@ -57,11 +57,12 @@ type RuntimeStats struct { // AllocationInfo represents a single allocation site type AllocationInfo struct { - Function string `json:"function"` - File string `json:"file"` - Line int `json:"line"` - Bytes int64 `json:"bytes"` - Count int64 `json:"count"` + Function string `json:"function"` + File string `json:"file"` + Line int `json:"line"` + Bytes int64 `json:"bytes"` + Count int64 `json:"count"` + Stack []string `json:"stack"` } // GoroutineGroup represents a group of goroutines with the same stack trace @@ -104,12 +105,13 @@ type HistoryPoint struct { // PprofData represents the complete pprof response type PprofData struct { - Timestamp string `json:"timestamp"` - Memory MemoryStats `json:"memory"` - CPU CPUStats `json:"cpu"` - Runtime RuntimeStats `json:"runtime"` - TopAllocations []AllocationInfo `json:"top_allocations"` - History []HistoryPoint `json:"history"` + Timestamp string `json:"timestamp"` + Memory MemoryStats `json:"memory"` + CPU CPUStats `json:"cpu"` + Runtime RuntimeStats `json:"runtime"` + TopAllocations []AllocationInfo `json:"top_allocations"` + InuseAllocations []AllocationInfo `json:"inuse_allocations"` + History []HistoryPoint `json:"history"` } // cpuSample holds a CPU time sample for calculating usage @@ -288,85 +290,138 @@ func (c *MetricsCollector) getCPUStats() CPUStats { return c.currentCPU } -// getTopAllocations analyzes heap profile to find top allocation sites -func getTopAllocations() []AllocationInfo { - // Write heap profile to buffer +// getAllocations analyzes the heap profile and returns two allocation lists +// aggregated by full call stack: +// - cumulative: alloc_space / alloc_objects (total since process start) +// - inuse: inuse_space / inuse_objects (currently live on the heap) +// +// Both are produced from a single pprof.WriteHeapProfile call. +func getAllocations() (cumulative, inuse []AllocationInfo) { var buf bytes.Buffer if err := pprof.WriteHeapProfile(&buf); err != nil { - return []AllocationInfo{} + return nil, nil } - // Parse the protobuf profile p, err := profile.Parse(&buf) if err != nil { - return []AllocationInfo{} + return nil, nil } - // Find the indices for alloc_objects and alloc_space sample types - var allocObjectsIdx, allocSpaceIdx int + allocObjectsIdx, allocSpaceIdx := -1, -1 + inuseObjectsIdx, inuseSpaceIdx := -1, -1 for i, st := range p.SampleType { switch st.Type { case "alloc_objects": allocObjectsIdx = i case "alloc_space": allocSpaceIdx = i + case "inuse_objects": + inuseObjectsIdx = i + case "inuse_space": + inuseSpaceIdx = i } } - // Aggregate allocations by function (top of stack = allocation site) allocMap := make(map[string]*AllocationInfo) + inuseMap := make(map[string]*AllocationInfo) for _, sample := range p.Sample { if len(sample.Location) == 0 { continue } - loc := sample.Location[0] // Top of stack = allocation site - if len(loc.Line) == 0 { + + topLoc := sample.Location[0] + if len(topLoc.Line) == 0 { continue } - line := loc.Line[0] - fn := line.Function - if fn == nil { + topLine := topLoc.Line[0] + topFn := topLine.Function + if topFn == nil { continue } - // Skip allocations from the profiler itself - if isProfilerFunction(fn.Name, fn.Filename) { + // Filter only the top frame — filtering inner frames would drop real + // user allocations that merely pass through runtime/profiler code. + if isProfilerFunction(topFn.Name, topFn.Filename) { continue } - key := fn.Name - if existing, ok := allocMap[key]; ok { - existing.Bytes += sample.Value[allocSpaceIdx] - existing.Count += sample.Value[allocObjectsIdx] - } else { - allocMap[key] = &AllocationInfo{ - Function: fn.Name, - File: fn.Filename, - Line: int(line.Line), - Bytes: sample.Value[allocSpaceIdx], - Count: sample.Value[allocObjectsIdx], + // Build full stack in goroutine-dump format: alternating "funcName" and + // "\tfile:line" entries, top-down. Matches GoroutineGroup.Stack so the + // UI can render both with the same code path. + stack := make([]string, 0, len(sample.Location)*2) + for _, loc := range sample.Location { + if len(loc.Line) == 0 { + continue + } + frame := loc.Line[0] + if frame.Function == nil { + continue + } + stack = append(stack, frame.Function.Name) + stack = append(stack, "\t"+frame.Function.Filename+":"+strconv.FormatInt(frame.Line, 10)) + } + if len(stack) == 0 { + continue + } + key := strings.Join(stack, "\n") + + if allocSpaceIdx >= 0 && allocObjectsIdx >= 0 { + b := sample.Value[allocSpaceIdx] + c := sample.Value[allocObjectsIdx] + if existing, ok := allocMap[key]; ok { + existing.Bytes += b + existing.Count += c + } else { + allocMap[key] = &AllocationInfo{ + Function: topFn.Name, + File: topFn.Filename, + Line: int(topLine.Line), + Bytes: b, + Count: c, + Stack: stack, + } } } - } - // Convert map to slice - allocations := make([]AllocationInfo, 0, len(allocMap)) - for _, alloc := range allocMap { - allocations = append(allocations, *alloc) + if inuseSpaceIdx >= 0 && inuseObjectsIdx >= 0 { + b := sample.Value[inuseSpaceIdx] + c := sample.Value[inuseObjectsIdx] + // Most samples have inuse=0 (already freed) — skip them so the live + // table isn't padded with noise. + if b == 0 && c == 0 { + continue + } + if existing, ok := inuseMap[key]; ok { + existing.Bytes += b + existing.Count += c + } else { + inuseMap[key] = &AllocationInfo{ + Function: topFn.Name, + File: topFn.Filename, + Line: int(topLine.Line), + Bytes: b, + Count: c, + Stack: stack, + } + } + } } - // Sort by bytes descending - sort.Slice(allocations, func(i, j int) bool { - return allocations[i].Bytes > allocations[j].Bytes - }) + return flattenAndTopN(allocMap), flattenAndTopN(inuseMap) +} - // Return top N allocations - if len(allocations) > topAllocationsCount { - allocations = allocations[:topAllocationsCount] +// flattenAndTopN sorts an allocation map by bytes desc and caps it. +func flattenAndTopN(m map[string]*AllocationInfo) []AllocationInfo { + out := make([]AllocationInfo, 0, len(m)) + for _, a := range m { + out = append(out, *a) } - - return allocations + sort.Slice(out, func(i, j int) bool { return out[i].Bytes > out[j].Bytes }) + if len(out) > topAllocationsCount { + out = out[:topAllocationsCount] + } + return out } // RegisterRoutes registers the dev pprof routes @@ -400,9 +455,9 @@ func (h *DevPprofHandler) getPprof(ctx *fasthttp.RequestCtx) { NumCPU: runtime.NumCPU(), GOMAXPROCS: runtime.GOMAXPROCS(0), }, - TopAllocations: getTopAllocations(), - History: h.collector.getHistory(), + History: h.collector.getHistory(), } + data.TopAllocations, data.InuseAllocations = getAllocations() SendJSON(ctx, data) } @@ -688,7 +743,8 @@ var profilerPatterns = []string{ "profile.Parse", "MetricsCollector", "collectLoop", - "getTopAllocations", + "getAllocations", + "flattenAndTopN", "parseGoroutineProfile", "getGoroutines", "getCPUSample", diff --git a/transports/bifrost-http/handlers/mcpserver.go b/transports/bifrost-http/handlers/mcpserver.go index 31e9c448d8..46fd0df3b1 100644 --- a/transports/bifrost-http/handlers/mcpserver.go +++ b/transports/bifrost-http/handlers/mcpserver.go @@ -307,11 +307,24 @@ func (h *MCPServerHandler) syncServer(server *server.MCPServer, availableTools [ inputSchema.Properties = make(map[string]any) } + // Map Bifrost annotations back to MCP tool annotations + var toolAnnotation mcp.ToolAnnotation + if tool.Annotations != nil { + toolAnnotation = mcp.ToolAnnotation{ + Title: tool.Annotations.Title, + ReadOnlyHint: tool.Annotations.ReadOnlyHint, + DestructiveHint: tool.Annotations.DestructiveHint, + IdempotentHint: tool.Annotations.IdempotentHint, + OpenWorldHint: tool.Annotations.OpenWorldHint, + } + } + // Register tool with the server server.AddTool(mcp.Tool{ Name: toolName, Description: description, InputSchema: inputSchema, + Annotations: toolAnnotation, }, handler) } } diff --git a/transports/bifrost-http/handlers/providers.go b/transports/bifrost-http/handlers/providers.go index 1696c9530a..353ed6005b 100644 --- a/transports/bifrost-http/handlers/providers.go +++ b/transports/bifrost-http/handlers/providers.go @@ -357,16 +357,16 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) { } var payload = struct { - Keys []schemas.Key `json:"keys"` // API keys for the provider - NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings - ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings - ProxyConfig *schemas.ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration - SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse - SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse + Keys []schemas.Key `json:"keys"` // API keys for the provider + NetworkConfig schemas.NetworkConfig `json:"network_config"` // Network-related settings + ConcurrencyAndBufferSize schemas.ConcurrencyAndBufferSize `json:"concurrency_and_buffer_size"` // Concurrency settings + ProxyConfig *schemas.ProxyConfig `json:"proxy_config,omitempty"` // Proxy configuration + SendBackRawRequest *bool `json:"send_back_raw_request,omitempty"` // Include raw request in BifrostResponse + SendBackRawResponse *bool `json:"send_back_raw_response,omitempty"` // Include raw response in BifrostResponse StoreRawRequestResponse *bool `json:"store_raw_request_response,omitempty"` // Capture raw request/response for internal logging only - CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration - OpenAIConfig *schemas.OpenAIConfig `json:"openai_config,omitempty"` // OpenAI-specific configuration - PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides + CustomProviderConfig *schemas.CustomProviderConfig `json:"custom_provider_config,omitempty"` // Custom provider configuration + OpenAIConfig *schemas.OpenAIConfig `json:"openai_config,omitempty"` // OpenAI-specific configuration + PricingOverrides []schemas.ProviderPricingOverride `json:"pricing_overrides,omitempty"` // Provider-level pricing overrides }{} if err := sonic.Unmarshal(ctx.PostBody(), &payload); err != nil { @@ -546,7 +546,6 @@ func (h *ProviderHandler) updateProvider(ctx *fasthttp.RequestCtx) { // Attempt model discovery err = h.attemptModelDiscovery(ctx, provider, payload.CustomProviderConfig) - if err != nil { logger.Warn("Model discovery failed for provider %s: %v", provider, err) } @@ -891,19 +890,19 @@ func (h *ProviderHandler) getModelParameters(ctx *fasthttp.RequestCtx) { } // keyAllowsModelForList reports whether a provider key permits model for catalog listing. -func keyAllowsModelForList(provider schemas.ModelProvider, model string, key schemas.Key, modelCatalog *modelcatalog.ModelCatalog) bool { - if len(key.BlacklistedModels) > 0 && keyModelListAllowsModel(provider, model, key.BlacklistedModels, modelCatalog) { +func keyAllowsModelForList(provider schemas.ModelProvider, model string, providerConfig *configstore.ProviderConfig, key schemas.Key, modelCatalog *modelcatalog.ModelCatalog) bool { + if len(key.BlacklistedModels) > 0 && keyModelListAllowsModel(provider, model, providerConfig, key.BlacklistedModels, modelCatalog) { return false } if len(key.Models) > 0 { - return keyModelListAllowsModel(provider, model, key.Models, modelCatalog) + return keyModelListAllowsModel(provider, model, providerConfig, key.Models, modelCatalog) } return true } // keyModelListAllowsModel reports whether model matches a key allow/deny list entry, // using catalog-aware alias matching when model metadata is available. -func keyModelListAllowsModel(provider schemas.ModelProvider, model string, allowedModels []string, modelCatalog *modelcatalog.ModelCatalog) bool { +func keyModelListAllowsModel(provider schemas.ModelProvider, model string, providerConfig *configstore.ProviderConfig, allowedModels []string, modelCatalog *modelcatalog.ModelCatalog) bool { if len(allowedModels) == 0 { return false } @@ -912,7 +911,7 @@ func keyModelListAllowsModel(provider schemas.ModelProvider, model string, allow return slices.Contains(allowedModels, model) } - if modelCatalog.IsModelAllowedForProvider(provider, model, allowedModels) { + if modelCatalog.IsModelAllowedForProvider(provider, model, providerConfig, allowedModels) { return true } @@ -1010,7 +1009,7 @@ func filterModelsByKeysWithAccessMap(config *configstore.ProviderConfig, provide for _, model := range models { grantedBy := make([]string, 0, len(matchedKeys)) for _, matched := range matchedKeys { - if keyAllowsModelForList(provider, model, matched.key, modelCatalog) { + if keyAllowsModelForList(provider, model, config, matched.key, modelCatalog) { grantedBy = append(grantedBy, matched.id) } } @@ -1282,7 +1281,6 @@ func (h *ProviderHandler) attemptModelDiscovery(ctx *fasthttp.RequestCtx, provid defer cancel() _, err := h.modelsManager.ReloadProvider(ctxWithTimeout, provider) - if err != nil { return err } @@ -1391,8 +1389,8 @@ func validatePricingOverrideNonNegativeFields(index int, override schemas.Provid "input_cost_per_token_above_200k_tokens": override.InputCostPerTokenAbove200kTokens, "output_cost_per_token_above_200k_tokens": override.OutputCostPerTokenAbove200kTokens, "cache_creation_input_token_cost_above_200k_tokens": override.CacheCreationInputTokenCostAbove200kTokens, - "cache_read_input_token_cost_above_200k_tokens": override.CacheReadInputTokenCostAbove200kTokens, - "cache_read_input_token_cost": override.CacheReadInputTokenCost, + "cache_read_input_token_cost_above_200k_tokens": override.CacheReadInputTokenCostAbove200kTokens, + "cache_read_input_token_cost": override.CacheReadInputTokenCost, "cache_creation_input_token_cost": override.CacheCreationInputTokenCost, "input_cost_per_token_batches": override.InputCostPerTokenBatches, "output_cost_per_token_batches": override.OutputCostPerTokenBatches, diff --git a/transports/bifrost-http/integrations/anthropic.go b/transports/bifrost-http/integrations/anthropic.go index 25033b4928..7dc7ee76c1 100644 --- a/transports/bifrost-http/integrations/anthropic.go +++ b/transports/bifrost-http/integrations/anthropic.go @@ -14,6 +14,7 @@ import ( "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/transports/bifrost-http/lib" + "github.com/tidwall/gjson" "github.com/valyala/fasthttp" ) @@ -119,9 +120,8 @@ func createAnthropicMessagesRouteConfig(pathPrefix string, logger schemas.Logger if !ok { return "", nil, fmt.Errorf("expected RawResponse string, got %T", resp.ExtraFields.RawResponse) } - var rawResponseJSON anthropic.AnthropicStreamEvent - if err := sonic.Unmarshal([]byte(raw), &rawResponseJSON); err == nil { - return string(rawResponseJSON.Type), raw, nil + if t := gjson.Get(raw, "type"); t.Exists() { + return t.String(), raw, nil } } // Fallback: if RawResponse is not available, use bifrost-to-anthropic conversion diff --git a/transports/bifrost-http/integrations/bedrock.go b/transports/bifrost-http/integrations/bedrock.go index 38ad98fbc6..2de43be580 100644 --- a/transports/bifrost-http/integrations/bedrock.go +++ b/transports/bifrost-http/integrations/bedrock.go @@ -156,10 +156,10 @@ func createBedrockInvokeWithResponseStreamRouteConfig(pathPrefix string, handler // Check if we have raw response (which holds the chunk payload) if rawResp, ok := resp.ExtraFields.RawResponse.(string); ok { - // Create BedrockStreamEvent with InvokeModelRawChunk + // Create BedrockStreamEvent with InvokeModelRawChunks // The payload bytes are the raw JSON string bedrockEvent := &bedrock.BedrockStreamEvent{ - InvokeModelRawChunk: []byte(rawResp), + InvokeModelRawChunks: [][]byte{[]byte(rawResp)}, } return "", bedrockEvent, nil } diff --git a/transports/bifrost-http/integrations/router.go b/transports/bifrost-http/integrations/router.go index cee9721eb9..d1aa276a8d 100644 --- a/transports/bifrost-http/integrations/router.go +++ b/transports/bifrost-http/integrations/router.go @@ -1474,7 +1474,6 @@ func (g *GenericRouter) handleAsyncCreate( } operationType := config.GetHTTPRequestType(ctx) - vkValue := getVirtualKeyFromBifrostContext(bifrostCtx) resultTTL := getResultTTLFromHeaderWithDefault(ctx, g.handlerStore.GetAsyncJobResultTTL()) // The operation closure runs the Bifrost client call in the background. @@ -1491,7 +1490,7 @@ func (g *GenericRouter) handleAsyncCreate( } } - job, err := executor.SubmitJob(vkValue, resultTTL, operation, operationType) + job, err := executor.SubmitJob(bifrostCtx, resultTTL, operation, operationType) if err != nil { g.sendError(ctx, bifrostCtx, config.ErrorConverter, newBifrostError(err, "failed to create async job")) diff --git a/transports/bifrost-http/lib/validator_test.go b/transports/bifrost-http/lib/validator_test.go index afac1d4975..e0cbcd8e4b 100644 --- a/transports/bifrost-http/lib/validator_test.go +++ b/transports/bifrost-http/lib/validator_test.go @@ -680,17 +680,15 @@ func TestValidateConfigSchema_MCPClientConfig_Valid_Stdio(t *testing.T) { } } -func TestValidateConfigSchema_MCPClientConfig_Valid_Websocket(t *testing.T) { - // Valid MCP client config with websocket connection type +func TestValidateConfigSchema_MCPClientConfig_Valid_Sse(t *testing.T) { + // Valid MCP client config with sse connection type validConfig := `{ "mcp": { "client_configs": [ { "name": "my-mcp-client", - "connection_type": "websocket", - "websocket_config": { - "url": "ws://localhost:8080" - } + "connection_type": "sse", + "connection_string": "http://localhost:8080" } ] } @@ -698,7 +696,7 @@ func TestValidateConfigSchema_MCPClientConfig_Valid_Websocket(t *testing.T) { err := ValidateConfigSchema([]byte(validConfig), loadLocalSchema(t)) if err != nil { - t.Errorf("expected valid MCP client config (websocket) to pass validation, got error: %v", err) + t.Errorf("expected valid MCP client config (sse) to pass validation, got error: %v", err) } } @@ -710,9 +708,7 @@ func TestValidateConfigSchema_MCPClientConfig_Valid_Http(t *testing.T) { { "name": "my-mcp-client", "connection_type": "http", - "http_config": { - "url": "http://localhost:8080" - } + "connection_string": "http://localhost:8080" } ] } @@ -1202,7 +1198,7 @@ func TestValidateConfigSchema_OtelPlugin_Valid(t *testing.T) { "name": "otel", "config": { "collector_url": "http://localhost:4318", - "trace_type": "otel", + "trace_type": "genai_extension", "protocol": "http" } } @@ -1223,7 +1219,7 @@ func TestValidateConfigSchema_OtelPlugin_MissingCollectorUrl(t *testing.T) { "enabled": true, "name": "otel", "config": { - "trace_type": "otel", + "trace_type": "genai_extension", "protocol": "http" } } @@ -1266,7 +1262,7 @@ func TestValidateConfigSchema_OtelPlugin_MissingProtocol(t *testing.T) { "name": "otel", "config": { "collector_url": "http://localhost:4318", - "trace_type": "otel" + "trace_type": "genai_extension" } } ] diff --git a/transports/config.schema.json b/transports/config.schema.json index 7e64fb2440..9459d18552 100644 --- a/transports/config.schema.json +++ b/transports/config.schema.json @@ -10,6 +10,12 @@ "description": "The schema version. This should be set to \"https://www.getbifrost.ai/schema\"", "const": "https://www.getbifrost.ai/schema" }, + "version": { + "type": "integer", + "description": "Controls how empty arrays in allow-list fields (models, allowed_models, key_ids, tools_to_execute) are interpreted. Omit or set to 2 for v1.5.0+ semantics: empty = deny all, [\"*\"] = allow all. Set to 1 to restore v1.4.x semantics: empty = allow all.", + "enum": [1, 2], + "default": 2 + }, "encryption_key": { "type": "string", "description": "You can set the value as env. to use an environment variable. We also read encryption key from BIFROST_ENCRYPTION_KEY environment variable. Note: once set, the encryption key cannot be changed unless you clean up the database. Accepts any string; a secure 32-byte AES-256 key will be derived using Argon2id KDF. If not provided, data will be saved in plain text. Recommended: use a passphrase of at least 16 bytes for better security" @@ -94,9 +100,29 @@ "minimum": 1, "description": "Maximum request body size in MB" }, - "enable_litellm_fallbacks": { - "type": "boolean", - "description": "Enable litellm-specific fallbacks for text completion for Groq" + "compat": { + "type": "object", + "description": "Compat plugin configuration for request type conversion, parameter dropping, and parameter value conversion", + "properties": { + "convert_text_to_chat": { + "type": "boolean", + "description": "Convert text completion requests to chat for models that only support chat" + }, + "convert_chat_to_responses": { + "type": "boolean", + "description": "Convert chat completion requests to responses for models that only support responses" + }, + "should_drop_params": { + "type": "boolean", + "description": "Drop unsupported parameters based on model catalog allowlist" + }, + "should_convert_params": { + "type": "boolean", + "description": "Converts model parameter values that are not supported by the model.", + "default": false + } + }, + "additionalProperties": false }, "header_filter_config": { "type": "object", @@ -180,6 +206,17 @@ "minimum": 0, "description": "Global tool sync interval in minutes (0 = disabled)", "default": 10 + }, + "mcp_disable_auto_tool_inject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers, such as x-bf-mcp-include-tools or x-bf-mcp-include-clients.", + "default": false + }, + "routing_chain_max_depth": { + "type": "integer", + "minimum": 1, + "description": "Maximum depth for routing rule chain evaluation", + "default": 10 } }, "additionalProperties": false @@ -219,7 +256,7 @@ "$ref": "#/$defs/provider" }, "ollama": { - "$ref": "#/$defs/provider" + "$ref": "#/$defs/provider_with_ollama_config" }, "groq": { "$ref": "#/$defs/provider" @@ -231,7 +268,7 @@ "$ref": "#/$defs/provider" }, "sgl": { - "$ref": "#/$defs/provider" + "$ref": "#/$defs/provider_with_sgl_config" }, "parasail": { "$ref": "#/$defs/provider" @@ -240,7 +277,7 @@ "$ref": "#/$defs/provider" }, "replicate": { - "$ref": "#/$defs/provider" + "$ref": "#/$defs/provider_with_replicate_config" }, "elevenlabs": { "$ref": "#/$defs/provider" @@ -256,6 +293,15 @@ }, "fireworks": { "$ref": "#/$defs/provider" + }, + "nebius": { + "$ref": "#/$defs/provider" + }, + "xai": { + "$ref": "#/$defs/provider" + }, + "runway": { + "$ref": "#/$defs/provider" } }, "additionalProperties": true @@ -292,17 +338,16 @@ "format": "date-time", "description": "Last time budget was reset" }, - "calendar_aligned": { - "type": "boolean", - "description": "Snap resets to calendar boundaries (day/week/month/year start)", - "default": false + "virtual_key_id": { + "type": "string", + "description": "ID of the virtual key this budget belongs to (mutually exclusive with provider_config_id)" + }, + "provider_config_id": { + "type": "integer", + "description": "ID of the provider config this budget belongs to (mutually exclusive with virtual_key_id)" } }, - "required": [ - "id", - "max_limit", - "reset_duration" - ], + "required": ["id", "max_limit", "reset_duration"], "additionalProperties": false } }, @@ -353,9 +398,7 @@ "description": "Last time request counter was reset" } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -382,10 +425,7 @@ "description": "Associated rate limit ID" } }, - "required": [ - "id", - "name" - ], + "required": ["id", "name"], "additionalProperties": false } }, @@ -428,10 +468,7 @@ "description": "Team claims data" } }, - "required": [ - "id", - "name" - ], + "required": ["id", "name"], "additionalProperties": false } }, @@ -462,6 +499,11 @@ "description": "Whether the virtual key is active", "default": true }, + "calendar_aligned": { + "type": "boolean", + "description": "Snap all budget resets to calendar boundaries (day, week, month, year)", + "default": false + }, "team_id": { "type": "string", "description": "Associated team ID (mutually exclusive with customer_id)" @@ -470,33 +512,26 @@ "type": "string", "description": "Associated customer ID (mutually exclusive with team_id)" }, - "budget_id": { - "type": "string", - "description": "Associated budget ID" - }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, "provider_configs": { "type": "array", - "description": "Provider configurations for this virtual key (empty means all providers allowed)", + "description": "Provider configurations for this virtual key (empty means no providers allowed, deny-by-default)", "items": { "$ref": "#/$defs/virtual_key_provider_config" } }, "mcp_configs": { "type": "array", - "description": "MCP configurations for this virtual key", + "description": "MCP configurations for this virtual key (empty array means no MCP tools allowed, deny-by-default)", "items": { "$ref": "#/$defs/virtual_key_mcp_config" } } }, - "required": [ - "id", - "name" - ], + "required": ["id", "name"], "additionalProperties": false } }, @@ -507,6 +542,13 @@ "$ref": "#/$defs/routing_rule" } }, + "pricing_overrides": { + "type": "array", + "description": "Scoped pricing overrides applied at runtime by the model catalog", + "items": { + "$ref": "#/$defs/provider_pricing_override" + } + }, "auth_config": { "$ref": "#/$defs/auth_config" }, @@ -537,10 +579,7 @@ "description": "Rate limit ID to associate with this model" } }, - "required": [ - "id", - "model_name" - ], + "required": ["id", "model_name"], "additionalProperties": false } }, @@ -550,10 +589,18 @@ "items": { "type": "object", "properties": { + "id": { + "type": "string", + "description": "Provider row ID" + }, "name": { "type": "string", "description": "Provider name" }, + "description": { + "type": "string", + "description": "Operator-facing provider description" + }, "budget_id": { "type": "string", "description": "Associated budget ID" @@ -573,6 +620,21 @@ "store_raw_request_response": { "type": "boolean", "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "network_config": { + "$ref": "#/$defs/network_config" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "openai_config": { + "$ref": "#/$defs/openai_config" } }, "required": ["name"] @@ -612,12 +674,7 @@ }, "type": { "type": "string", - "enum": [ - "weaviate", - "redis", - "qdrant", - "pinecone" - ], + "enum": ["weaviate", "redis", "qdrant", "pinecone"], "description": "Vector store type (use \"redis\" for Redis or Valkey-compatible endpoints)" }, "config": { @@ -685,10 +742,7 @@ }, "type": { "type": "string", - "enum": [ - "sqlite", - "postgres" - ], + "enum": ["sqlite", "postgres"], "description": "Configuration store type" }, "config": { @@ -709,9 +763,7 @@ "description": "Database file path" } }, - "required": [ - "path" - ], + "required": ["path"], "additionalProperties": false } }, @@ -763,14 +815,7 @@ "default": 50 } }, - "required": [ - "host", - "port", - "user", - "password", - "db_name", - "ssl_mode" - ], + "required": ["host", "port", "user", "password", "db_name", "ssl_mode"], "additionalProperties": false } } @@ -789,10 +834,7 @@ }, "type": { "type": "string", - "enum": [ - "sqlite", - "postgres" - ], + "enum": ["sqlite", "postgres"], "description": "Logs store type" }, "config": { @@ -813,9 +855,7 @@ "description": "Database file path" } }, - "required": [ - "path" - ], + "required": ["path"], "additionalProperties": false } }, @@ -866,43 +906,124 @@ "default": 50 } }, - "required": [ - "host", - "port", - "user", - "password", - "db_name", - "ssl_mode" - ], + "required": ["host", "port", "user", "password", "db_name", "ssl_mode"], "additionalProperties": false } } ] + }, + "object_storage": { + "type": "object", + "description": "Optional object storage for offloading log payloads. When configured, large request/response payloads are stored in S3/GCS while the DB keeps only lightweight index data.", + "properties": { + "type": { + "type": "string", + "enum": ["s3", "gcs"], + "description": "Object storage backend type" + }, + "bucket": { + "type": "string", + "minLength": 1, + "description": "Bucket name. Supports env var reference (e.g. env.S3_BUCKET)" + }, + "prefix": { + "type": "string", + "description": "Key prefix for stored objects (default: bifrost)", + "default": "bifrost" + }, + "compress": { + "type": "boolean", + "description": "Enable gzip compression for stored objects. Default: false", + "default": false + } + }, + "required": ["type", "bucket"], + "if": { + "properties": { + "type": { + "const": "s3" + } + } + }, + "then": { + "properties": { + "type": true, + "bucket": true, + "prefix": true, + "region": { + "type": "string", + "description": "AWS region. Supports env var reference" + }, + "endpoint": { + "type": "string", + "description": "Custom S3-compatible endpoint for MinIO/R2. Supports env var reference" + }, + "access_key_id": { + "type": "string", + "description": "AWS access key ID. Omit to use default credential chain (instance role, env vars, etc.). Supports env var reference" + }, + "secret_access_key": { + "type": "string", + "description": "AWS secret access key. Supports env var reference" + }, + "session_token": { + "type": "string", + "description": "AWS session token for STS temporary credentials. Supports env var reference" + }, + "role_arn": { + "type": "string", + "description": "AWS IAM role ARN for STS AssumeRole. Works with static creds or instance role. Supports env var reference" + }, + "force_path_style": { + "type": "boolean", + "description": "Use path-style URLs for S3 (required for MinIO). Default: false", + "default": false + }, + "compress": true + }, + "dependentRequired": { + "access_key_id": ["secret_access_key"], + "secret_access_key": ["access_key_id"], + "session_token": ["access_key_id", "secret_access_key"] + }, + "additionalProperties": false + }, + "else": { + "properties": { + "type": true, + "bucket": true, + "prefix": true, + "credentials_json": { + "type": "string", + "description": "GCP service account credentials JSON or file path. Omit to use Application Default Credentials. Supports env var reference" + }, + "credentials": { + "type": "string", + "description": "Deprecated: use credentials_json. Kept for backwards compatibility." + }, + "project_id": { + "type": "string", + "description": "GCP project ID override. Supports env var reference" + }, + "compress": true + }, + "additionalProperties": false + } + }, + "retention_days": { + "type": "integer", + "minimum": 0, + "description": "Days to retain log entries. 0 disables retention-based cleanup." } }, "additionalProperties": false }, - "cluster_config": { - "$ref": "#/$defs/cluster_config" - }, - "saml_config": { - "$ref": "#/$defs/saml_config" - }, - "load_balancer_config": { - "$ref": "#/$defs/load_balancer_config" - }, - "guardrails_config": { - "$ref": "#/$defs/guardrails_config" - }, "plugins": { "type": "array", "description": "Plugins configuration", "items": { "type": "object", - "required": [ - "enabled", - "name" - ], + "required": ["enabled", "name"], "properties": { "enabled": { "type": "boolean", @@ -910,7 +1031,7 @@ }, "name": { "type": "string", - "description": "Name of the plugin (built-in: telemetry, logging, governance, maxim, semantic_cache, otel, or custom plugin name)" + "description": "Name of the plugin (built-in: telemetry, prompts, logging, governance, maxim, semantic_cache, otel, or custom plugin name)" }, "config": { "type": "object", @@ -930,8 +1051,8 @@ }, "placement": { "type": "string", - "enum": ["pre_builtin", "post_builtin"], - "description": "Whether this plugin runs before or after built-in plugins. Default: post_builtin", + "enum": ["pre_builtin", "post_builtin", "builtin"], + "description": "Whether this plugin runs before, after, or as a built-in. Default: post_builtin", "optional": true, "default": "post_builtin" }, @@ -952,9 +1073,7 @@ } }, "then": { - "required": [ - "config" - ], + "required": ["config"], "properties": { "config": { "type": "object", @@ -1030,9 +1149,7 @@ } }, "then": { - "required": [ - "config" - ], + "required": ["config"], "properties": { "config": { "type": "object", @@ -1064,9 +1181,7 @@ } }, "then": { - "required": [ - "config" - ], + "required": ["config"], "properties": { "config": { "type": "object", @@ -1102,9 +1217,7 @@ } }, "then": { - "required": [ - "config" - ], + "required": ["config"], "properties": { "config": { "type": "object", @@ -1119,9 +1232,7 @@ "description": "Optional default ID for the Maxim logger instance" } }, - "required": [ - "api_key" - ], + "required": ["api_key"], "additionalProperties": false } } @@ -1136,9 +1247,7 @@ } }, "then": { - "required": [ - "config" - ], + "required": ["config"], "properties": { "config": { "type": "object", @@ -1189,7 +1298,7 @@ "oneOf": [ { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$" + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$" }, { "type": "integer", @@ -1234,9 +1343,7 @@ "description": "Exclude system prompt in cache key (default: false)" } }, - "required": [ - "dimension" - ], + "required": ["dimension"], "allOf": [ { "if": { @@ -1246,15 +1353,10 @@ "minLength": 1 } }, - "required": [ - "provider" - ] + "required": ["provider"] }, "then": { - "required": [ - "provider", - "embedding_model" - ], + "required": ["provider", "embedding_model"], "properties": { "dimension": { "type": "integer", @@ -1264,9 +1366,7 @@ }, "else": { "not": { - "required": [ - "embedding_model" - ] + "required": ["embedding_model"] }, "properties": { "dimension": { @@ -1290,9 +1390,7 @@ } }, "then": { - "required": [ - "config" - ], + "required": ["config"], "properties": { "config": { "type": "object", @@ -1318,17 +1416,12 @@ "trace_type": { "type": "string", "description": "Type of trace to use for the OTEL collector", - "enum": [ - "otel" - ] + "enum": ["genai_extension", "vercel", "open_inference"] }, "protocol": { "type": "string", "description": "Protocol to use for the OTEL collector", - "enum": [ - "http", - "grpc" - ] + "enum": ["http", "grpc"] }, "metrics_enabled": { "type": "boolean", @@ -1370,11 +1463,7 @@ "description": "Skip TLS verification (ignored if tls_ca_cert is set)" } }, - "required": [ - "collector_url", - "trace_type", - "protocol" - ], + "required": ["collector_url", "trace_type", "protocol"], "additionalProperties": false } } @@ -1389,9 +1478,7 @@ } }, "then": { - "required": [ - "config" - ], + "required": ["config"], "properties": { "config": { "type": "object", @@ -1437,14 +1524,26 @@ "additionalProperties": false } }, + "websocket": { + "$ref": "#/$defs/websocket_config" + }, + "guardrails_config": { + "$ref": "#/$defs/guardrails_config" + }, "audit_logs": { "$ref": "#/$defs/audit_logs_config" }, + "cluster_config": { + "$ref": "#/$defs/cluster_config" + }, + "load_balancer_config": { + "$ref": "#/$defs/load_balancer_config" + }, "large_payload_optimization": { "$ref": "#/$defs/large_payload_optimization" }, - "websocket": { - "$ref": "#/$defs/websocket_config" + "scim_config": { + "$ref": "#/$defs/scim_config" } }, "additionalProperties": false, @@ -1503,6 +1602,11 @@ "type": "string", "description": "CEL (Common Expression Language) expression for rule evaluation" }, + "chain_rule": { + "type": "boolean", + "default": false, + "description": "If true, re-evaluates routing chain after this rule matches" + }, "targets": { "type": "array", "minItems": 1, @@ -1527,7 +1631,7 @@ "default": "global" }, "scope_id": { - "type": ["string", "null"], + "type": "string", "description": "Entity ID for non-global scopes (required for non-global scope)" }, "priority": { @@ -1542,7 +1646,24 @@ } }, "required": ["id", "name", "targets"], - "additionalProperties": false + "additionalProperties": false, + "if": { + "properties": { + "scope": { + "enum": ["team", "customer", "virtual_key"] + } + }, + "required": ["scope"] + }, + "then": { + "required": ["scope_id"], + "properties": { + "scope_id": { + "type": "string", + "minLength": 1 + } + } + } }, "virtual_key_provider_config": { "type": "object", @@ -1561,276 +1682,30 @@ "description": "Provider name" }, "weight": { - "type": "number", - "description": "Weight for load balancing", - "default": 1.0 + "type": ["number", "null"], + "description": "Weight for load balancing (null opts out of weighted routing)", + "default": null }, "allowed_models": { "type": "array", - "description": "Allowed models for this provider config (empty means all models allowed)", + "description": "Allowed models for this provider config. Use [\"*\"] to allow all models; empty array denies all (deny-by-default).", "items": { "type": "string" } }, - "budget_id": { - "type": "string", - "description": "Associated budget ID" - }, "rate_limit_id": { "type": "string", "description": "Associated rate limit ID" }, - "keys": { + "key_ids": { "type": "array", - "description": "Provider keys for this config (empty means all keys allowed for this provider)", + "description": "Key identifiers allowed for this provider config. Use [\"*\"] to allow all keys; empty array denies all (deny-by-default). In config.json, values are key names. Via the API, values are key UUIDs.", "items": { - "type": "object", - "properties": { - "id": { - "type": "integer", - "description": "Key database ID (auto-generated)" - }, - "key_id": { - "type": "string", - "description": "Key UUID identifier" - }, - "name": { - "type": "string", - "description": "Key name (must be unique)" - }, - "value": { - "type": "string", - "description": "API key value (can use env. prefix)" - }, - "models": { - "type": "array", - "items": { - "type": "string" - }, - "description": "Supported models for this key" - }, - "weight": { - "type": "number", - "minimum": 0, - "default": 1.0, - "description": "Weight for load balancing" - }, - "azure_key_config": { - "type": "object", - "properties": { - "endpoint": { - "type": "string", - "description": "Azure endpoint (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - }, - "api_version": { - "type": "string", - "description": "Azure API version" - } - }, - "required": [ - "endpoint" - ], - "additionalProperties": false - }, - "vertex_key_config": { - "type": "object", - "properties": { - "project_id": { - "type": "string", - "description": "Google Cloud project ID (can use env. prefix)" - }, - "project_number": { - "type": "string", - "description": "Google Cloud project number" - }, - "region": { - "type": "string", - "description": "Google Cloud region" - }, - "auth_credentials": { - "type": "string", - "description": "Authentication credentials (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - } - }, - "required": [ - "project_id", - "region" - ], - "additionalProperties": false - }, - "bedrock_key_config": { - "type": "object", - "properties": { - "access_key": { - "type": "string", - "description": "AWS access key (can use env. prefix)" - }, - "secret_key": { - "type": "string", - "description": "AWS secret key (can use env. prefix)" - }, - "session_token": { - "type": "string", - "description": "AWS session token (can use env. prefix)" - }, - "region": { - "type": "string", - "description": "AWS region" - }, - "arn": { - "type": "string", - "description": "AWS ARN" - }, - "role_arn": { - "type": "string", - "description": "AWS IAM role ARN for AssumeRole (can use env. prefix)" - }, - "external_id": { - "type": "string", - "description": "External ID for AssumeRole (can use env. prefix)" - }, - "session_name": { - "type": "string", - "description": "Role session name for AssumeRole (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - }, - "batch_s3_config": { - "type": "object", - "description": "S3 bucket configuration for Bedrock batch operations", - "properties": { - "buckets": { - "type": "array", - "description": "List of S3 bucket configurations", - "items": { - "type": "object", - "properties": { - "bucket_name": { - "type": "string", - "description": "S3 bucket name" - }, - "prefix": { - "type": "string", - "description": "S3 key prefix for batch files" - }, - "is_default": { - "type": "boolean", - "description": "Whether this is the default bucket for batch operations" - } - }, - "required": ["bucket_name"], - "additionalProperties": false - } - } - }, - "additionalProperties": false - } - }, - "additionalProperties": false - }, - "vllm_key_config": { - "type": "object", - "properties": { - "url": { - "type": "string", - "minLength": 1, - "description": "VLLM server base URL (can use env. prefix)" - }, - "model_name": { - "type": "string", - "minLength": 1, - "description": "Exact model name served on this VLLM instance" - } - }, - "required": [ - "url", - "model_name" - ], - "additionalProperties": false - } - }, - "oneOf": [ - { - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["vertex_key_config"] }, - { "required": ["bedrock_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["azure_key_config"], - "not": { - "anyOf": [ - { "required": ["vertex_key_config"] }, - { "required": ["bedrock_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["vertex_key_config"], - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["bedrock_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["bedrock_key_config"], - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["vertex_key_config"] }, - { "required": ["vllm_key_config"] } - ] - } - }, - { - "required": ["vllm_key_config"], - "not": { - "anyOf": [ - { "required": ["azure_key_config"] }, - { "required": ["vertex_key_config"] }, - { "required": ["bedrock_key_config"] } - ] - } - } - ], - "required": [ - "key_id", - "name", - "value" - ] + "type": "string" } } }, - "required": [ - "provider" - ], + "required": ["provider"], "additionalProperties": false }, "virtual_key_mcp_config": { @@ -1847,19 +1722,20 @@ }, "mcp_client_id": { "type": "integer", - "description": "Associated MCP client ID" + "description": "Associated MCP client ID (database format)" + }, + "mcp_client_name": { + "type": "string", + "description": "MCP client name (config file format \u2014 resolved to mcp_client_id at startup)" }, "tools_to_execute": { "type": "array", - "description": "Tools to execute for this MCP config", + "description": "Include-only list of tools this Virtual Key is permitted to execute from this MCP client. ['*'] means all tools allowed, [] means no tools allowed (deny-by-default).", "items": { "type": "string" } } }, - "required": [ - "mcp_client_id" - ], "additionalProperties": false }, "auth_config": { @@ -1904,169 +1780,78 @@ }, "additionalProperties": false }, - "pricing_override_match_type": { - "type": "string", - "enum": [ - "exact", - "wildcard", - "regex" - ] - }, - "pricing_override_request_type": { - "type": "string", - "enum": [ - "text_completion", - "text_completion_stream", - "chat_completion", - "chat_completion_stream", - "responses", - "responses_stream", - "embedding", - "rerank", - "ocr", - "speech", - "speech_stream", - "transcription", - "transcription_stream", - "image_generation", - "image_generation_stream" - ] - }, - "provider_pricing_override": { + "network_config": { "type": "object", "properties": { - "model_pattern": { + "base_url": { "type": "string", - "minLength": 1 + "format": "uri", + "description": "Base URL for the provider (optional, required for Ollama)" }, - "match_type": { - "$ref": "#/$defs/pricing_override_match_type" + "extra_headers": { + "type": "object", + "additionalProperties": { + "type": "string" + }, + "description": "Additional headers to send with requests" }, - "request_types": { - "type": "array", - "items": { - "$ref": "#/$defs/pricing_override_request_type" - } + "default_request_timeout_in_seconds": { + "type": "integer", + "minimum": 1, + "description": "Default request timeout in seconds" }, - "input_cost_per_token": { "type": "number", "minimum": 0 }, - "output_cost_per_token": { "type": "number", "minimum": 0 }, - "input_cost_per_video_per_second": { "type": "number", "minimum": 0 }, - "input_cost_per_audio_per_second": { "type": "number", "minimum": 0 }, - "input_cost_per_character": { "type": "number", "minimum": 0 }, - "output_cost_per_character": { "type": "number", "minimum": 0 }, - "input_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_image_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_video_per_second_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_audio_per_second_above_128k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_token_above_128k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_character_above_128k_tokens": { "type": "number", "minimum": 0 }, - "input_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 }, - "output_cost_per_token_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_creation_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_read_input_token_cost_above_200k_tokens": { "type": "number", "minimum": 0 }, - "cache_read_input_token_cost": { "type": "number", "minimum": 0 }, - "cache_creation_input_token_cost": { "type": "number", "minimum": 0 }, - "input_cost_per_token_batches": { "type": "number", "minimum": 0 }, - "output_cost_per_token_batches": { "type": "number", "minimum": 0 }, - "input_cost_per_image_token": { "type": "number", "minimum": 0 }, - "output_cost_per_image_token": { "type": "number", "minimum": 0 }, - "input_cost_per_image": { "type": "number", "minimum": 0 }, - "output_cost_per_image": { "type": "number", "minimum": 0 }, - "cache_read_input_image_token_cost": { "type": "number", "minimum": 0 } - }, - "required": [ - "model_pattern", - "match_type" - ], - "additionalProperties": false - }, - "custom_provider_config": { - "type": "object", - "description": "Custom provider configuration for extending or customizing provider behavior", - "properties": { - "is_key_less": { + "max_retries": { + "type": "integer", + "minimum": 0, + "description": "Maximum number of retries" + }, + "retry_backoff_initial": { + "type": "integer", + "minimum": 0, + "description": "Initial retry backoff in milliseconds" + }, + "retry_backoff_max": { + "type": "integer", + "minimum": 0, + "description": "Maximum retry backoff in milliseconds" + }, + "enforce_http2": { "type": "boolean", - "description": "Whether the custom provider requires a key" + "description": "Force HTTP/2 on provider connections (relevant for Bedrock and other net/http-based providers)" }, - "base_provider_type": { + "insecure_skip_verify": { + "type": "boolean", + "description": "Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments." + }, + "ca_cert_pem": { "type": "string", - "description": "Base provider type to extend" + "description": "PEM-encoded CA certificate to trust for provider endpoint connections (e.g. self-signed or internal CA)" }, - "allowed_requests": { - "type": "object", - "description": "Allowed request types for the custom provider", - "properties": { - "list_models": { "type": "boolean" }, - "text_completion": { "type": "boolean" }, - "text_completion_stream": { "type": "boolean" }, - "chat_completion": { "type": "boolean" }, - "chat_completion_stream": { "type": "boolean" }, - "responses": { "type": "boolean" }, - "responses_stream": { "type": "boolean" }, - "count_tokens": { "type": "boolean" }, - "embedding": { "type": "boolean" }, - "rerank": { "type": "boolean" }, - "ocr": { "type": "boolean" }, - "speech": { "type": "boolean" }, - "speech_stream": { "type": "boolean" }, - "transcription": { "type": "boolean" }, - "transcription_stream": { "type": "boolean" }, - "image_generation": { "type": "boolean" }, - "image_generation_stream": { "type": "boolean" }, - "image_edit": { "type": "boolean" }, - "image_edit_stream": { "type": "boolean" }, - "image_variation": { "type": "boolean" }, - "video_generation": { "type": "boolean" }, - "video_retrieve": { "type": "boolean" }, - "video_download": { "type": "boolean" }, - "video_delete": { "type": "boolean" }, - "video_list": { "type": "boolean" }, - "video_remix": { "type": "boolean" }, - "batch_create": { "type": "boolean" }, - "batch_list": { "type": "boolean" }, - "batch_retrieve": { "type": "boolean" }, - "batch_cancel": { "type": "boolean" }, - "batch_delete": { "type": "boolean" }, - "batch_results": { "type": "boolean" }, - "file_upload": { "type": "boolean" }, - "file_list": { "type": "boolean" }, - "file_retrieve": { "type": "boolean" }, - "file_delete": { "type": "boolean" }, - "file_content": { "type": "boolean" }, - "container_create": { "type": "boolean" }, - "container_list": { "type": "boolean" }, - "container_retrieve": { "type": "boolean" }, - "container_delete": { "type": "boolean" }, - "container_file_create": { "type": "boolean" }, - "container_file_list": { "type": "boolean" }, - "container_file_retrieve": { "type": "boolean" }, - "container_file_content": { "type": "boolean" }, - "container_file_delete": { "type": "boolean" }, - "passthrough": { "type": "boolean" }, - "passthrough_stream": { "type": "boolean" } - }, - "additionalProperties": false + "stream_idle_timeout_in_seconds": { + "type": "integer", + "minimum": 5, + "maximum": 3600, + "description": "Idle timeout per stream chunk in seconds. If no data is received for this many seconds, the stream is closed. Default: 60." }, - "request_path_overrides": { + "max_conns_per_host": { + "type": "integer", + "minimum": 1, + "maximum": 10000, + "description": "Maximum number of TCP connections per provider host. For HTTP/2 (e.g. Bedrock), each connection supports ~100 concurrent streams. Default: 5000." + }, + "beta_header_overrides": { "type": "object", - "description": "Mapping of request type to custom path overriding the default provider path", "additionalProperties": { - "type": "string" - } + "type": "boolean" + }, + "description": "Override default Anthropic beta header support per provider. Keys are header prefixes (e.g. 'redact-thinking-'), values are true (supported) or false (unsupported). Headers not listed use the built-in defaults." } }, - "required": ["base_provider_type"], "additionalProperties": false }, - "network_config": { + "network_config_without_base_url": { "type": "object", "properties": { - "base_url": { - "type": "string", - "format": "uri", - "description": "Base URL for the provider (optional, required for Ollama)" - }, "extra_headers": { "type": "object", "additionalProperties": { @@ -2084,16 +1869,20 @@ "minimum": 0, "description": "Maximum number of retries" }, - "retry_backoff_initial_ms": { + "retry_backoff_initial": { "type": "integer", "minimum": 0, "description": "Initial retry backoff in milliseconds" }, - "retry_backoff_max_ms": { + "retry_backoff_max": { "type": "integer", "minimum": 0, "description": "Maximum retry backoff in milliseconds" }, + "enforce_http2": { + "type": "boolean", + "description": "Force HTTP/2 on provider connections (relevant for Bedrock and other net/http-based providers)" + }, "insecure_skip_verify": { "type": "boolean", "description": "Disable TLS certificate verification for provider connections. This bypasses server certificate validation and should be used only as a last resort when a trusted CA chain cannot be configured. Prefer ca_cert_pem for self-signed or private CA deployments." @@ -2124,7 +1913,18 @@ }, "additionalProperties": false }, - "concurrency_config": { + "openai_config": { + "type": "object", + "description": "OpenAI-specific provider settings", + "properties": { + "disable_store": { + "type": "boolean", + "description": "Disable OpenAI Responses API conversation storage." + } + }, + "additionalProperties": false + }, + "concurrency_and_buffer_size": { "type": "object", "properties": { "concurrency": { @@ -2138,10 +1938,7 @@ "description": "Buffer size for requests" } }, - "required": [ - "concurrency", - "buffer_size" - ], + "required": ["concurrency", "buffer_size"], "additionalProperties": false }, "base_key": { @@ -2160,8 +1957,7 @@ "items": { "type": "string" }, - "default": [], - "description": "Supported models for this key" + "description": "Models this key can access. Use [\"*\"] to allow all models; empty array denies all (deny-by-default)." }, "weight": { "type": "number", @@ -2172,12 +1968,20 @@ "type": "boolean", "description": "Whether this key can be used for batch API operations (default: false)", "default": false + }, + "aliases": { + "type": "object", + "additionalProperties": { + "type": "string", + "minLength": 1 + }, + "propertyNames": { + "minLength": 1 + }, + "description": "Model alias mappings: maps a model name to a provider-specific identifier (deployment name, inference profile ID, fine-tuned model ID, etc.)" } }, - "required": [ - "name", - "weight" - ] + "required": ["name", "weight"] }, "bedrock_key": { "allOf": [ @@ -2260,15 +2064,10 @@ "additionalProperties": false } }, - "required": [ - "region" - ], + "required": ["region"], "additionalProperties": false } - }, - "required": [ - "bedrock_key_config" - ] + } } ] }, @@ -2294,16 +2093,83 @@ "description": "Exact model name served on this VLLM instance" } }, - "required": [ - "url", - "model_name" - ], + "required": ["url", "model_name"], "additionalProperties": false } }, - "required": [ - "vllm_key_config" - ] + "required": ["vllm_key_config"] + } + ] + }, + "replicate_key": { + "allOf": [ + { + "$ref": "#/$defs/base_key" + }, + { + "type": "object", + "properties": { + "replicate_key_config": { + "type": "object", + "properties": { + "use_deployments_endpoint": { + "type": "boolean", + "description": "Whether to use the deployments endpoint instead of the models endpoint (default: false)" + } + }, + "additionalProperties": false + } + } + } + ] + }, + "ollama_key": { + "allOf": [ + { + "$ref": "#/$defs/base_key" + }, + { + "type": "object", + "properties": { + "ollama_key_config": { + "type": "object", + "properties": { + "url": { + "type": "string", + "minLength": 1, + "description": "Ollama server base URL (can use env. prefix)" + } + }, + "required": ["url"], + "additionalProperties": false + } + }, + "required": ["ollama_key_config"] + } + ] + }, + "sgl_key": { + "allOf": [ + { + "$ref": "#/$defs/base_key" + }, + { + "type": "object", + "properties": { + "sgl_key_config": { + "type": "object", + "properties": { + "url": { + "type": "string", + "minLength": 1, + "description": "SGLang server base URL (can use env. prefix)" + } + }, + "required": ["url"], + "additionalProperties": false + } + }, + "required": ["sgl_key_config"] } ] }, @@ -2322,28 +2188,16 @@ "type": "string", "description": "Azure endpoint (can use env. prefix)" }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" - }, "api_version": { "type": "string", "description": "Azure API version" } }, - "required": [ - "endpoint", - "api_version" - ], + "required": ["endpoint", "api_version"], "additionalProperties": false } }, - "required": [ - "azure_key_config" - ] + "required": ["azure_key_config"] } ] }, @@ -2373,35 +2227,140 @@ "auth_credentials": { "type": "string", "description": "Authentication credentials (can use env. prefix)" - }, - "deployments": { - "type": "object", - "additionalProperties": { - "type": "string" - }, - "description": "Model to deployment mappings" } }, - "required": [ - "project_id", - "region" - ], + "required": ["project_id", "region"], "additionalProperties": false } }, - "required": [ - "vertex_key_config" - ] + "required": ["vertex_key_config"] + } + ] + }, + "provider": { + "type": "object", + "properties": { + "keys": { + "type": "array", + "items": { + "$ref": "#/$defs/base_key" + }, + "minItems": 1, + "description": "API keys for this provider" + }, + "network_config": { + "$ref": "#/$defs/network_config" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "send_back_raw_request": { + "type": "boolean", + "description": "Include raw request in BifrostResponse (default: false)" + }, + "send_back_raw_response": { + "type": "boolean", + "description": "Include raw response in BifrostResponse (default: false)" + }, + "store_raw_request_response": { + "type": "boolean", + "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" + } + }, + "required": ["keys"], + "additionalProperties": false + }, + "provider_with_bedrock_config": { + "type": "object", + "properties": { + "keys": { + "type": "array", + "items": { + "$ref": "#/$defs/bedrock_key" + }, + "minItems": 1, + "description": "API keys for this provider" + }, + "network_config": { + "$ref": "#/$defs/network_config" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "send_back_raw_request": { + "type": "boolean", + "description": "Include raw request in BifrostResponse (default: false)" + }, + "send_back_raw_response": { + "type": "boolean", + "description": "Include raw response in BifrostResponse (default: false)" + }, + "store_raw_request_response": { + "type": "boolean", + "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" + } + }, + "required": ["keys"], + "additionalProperties": false + }, + "provider_with_vllm_config": { + "type": "object", + "properties": { + "keys": { + "type": "array", + "items": { + "$ref": "#/$defs/vllm_key" + }, + "minItems": 1, + "description": "API keys for this provider" + }, + "network_config": { + "$ref": "#/$defs/network_config_without_base_url" + }, + "concurrency_and_buffer_size": { + "$ref": "#/$defs/concurrency_and_buffer_size" + }, + "proxy_config": { + "$ref": "#/$defs/proxy_config" + }, + "send_back_raw_request": { + "type": "boolean", + "description": "Include raw request in BifrostResponse (default: false)" + }, + "send_back_raw_response": { + "type": "boolean", + "description": "Include raw response in BifrostResponse (default: false)" + }, + "store_raw_request_response": { + "type": "boolean", + "description": "Capture raw request/response for internal logging only; strip from API responses returned to clients (default: false)" + }, + "custom_provider_config": { + "$ref": "#/$defs/custom_provider_config" } - ] + }, + "required": ["keys"], + "additionalProperties": false }, - "provider": { + "provider_with_replicate_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/base_key" + "$ref": "#/$defs/replicate_key" }, "minItems": 1, "description": "API keys for this provider" @@ -2410,7 +2369,7 @@ "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2429,27 +2388,18 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, - "required": [ - "keys" - ], + "required": ["keys"], "additionalProperties": false }, - "provider_with_bedrock_config": { + "provider_with_azure_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/bedrock_key" + "$ref": "#/$defs/azure_key" }, "minItems": 1, "description": "API keys for this provider" @@ -2458,7 +2408,7 @@ "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2477,27 +2427,18 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, - "required": [ - "keys" - ], + "required": ["keys"], "additionalProperties": false }, - "provider_with_vllm_config": { + "provider_with_vertex_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/vllm_key" + "$ref": "#/$defs/vertex_key" }, "minItems": 1, "description": "API keys for this provider" @@ -2506,7 +2447,7 @@ "$ref": "#/$defs/network_config" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2525,36 +2466,27 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, - "required": [ - "keys" - ], + "required": ["keys"], "additionalProperties": false }, - "provider_with_azure_config": { + "provider_with_ollama_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/azure_key" + "$ref": "#/$defs/ollama_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { - "$ref": "#/$defs/network_config" + "$ref": "#/$defs/network_config_without_base_url" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2573,36 +2505,27 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, - "required": [ - "keys" - ], + "required": ["keys"], "additionalProperties": false }, - "provider_with_vertex_config": { + "provider_with_sgl_config": { "type": "object", "properties": { "keys": { "type": "array", "items": { - "$ref": "#/$defs/vertex_key" + "$ref": "#/$defs/sgl_key" }, "minItems": 1, "description": "API keys for this provider" }, "network_config": { - "$ref": "#/$defs/network_config" + "$ref": "#/$defs/network_config_without_base_url" }, "concurrency_and_buffer_size": { - "$ref": "#/$defs/concurrency_config" + "$ref": "#/$defs/concurrency_and_buffer_size" }, "proxy_config": { "$ref": "#/$defs/proxy_config" @@ -2621,18 +2544,9 @@ }, "custom_provider_config": { "$ref": "#/$defs/custom_provider_config" - }, - "pricing_overrides": { - "type": "array", - "items": { - "$ref": "#/$defs/provider_pricing_override" - }, - "description": "Provider-level pricing overrides matched by model pattern" } }, - "required": [ - "keys" - ], + "required": ["keys"], "additionalProperties": false }, "mcp_client_config": { @@ -2652,12 +2566,7 @@ }, "connection_type": { "type": "string", - "enum": [ - "stdio", - "websocket", - "http", - "sse" - ], + "enum": ["stdio", "http", "sse", "inprocess"], "description": "Connection type for MCP client" }, "connection_string": { @@ -2666,12 +2575,12 @@ }, "auth_type": { "type": "string", - "enum": ["none", "headers", "oauth"], + "enum": ["none", "headers", "oauth", "per_user_oauth"], "description": "Authentication type for MCP connection" }, "oauth_config_id": { "type": "string", - "description": "OAuth config ID reference (for oauth auth type)" + "description": "OAuth config ID reference (required when auth_type is 'oauth' or 'per_user_oauth')" }, "headers": { "type": "object", @@ -2702,37 +2611,7 @@ "description": "Environment variables" } }, - "required": [ - "command" - ], - "additionalProperties": false - }, - "websocket_config": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "WebSocket URL" - } - }, - "required": [ - "url" - ], - "additionalProperties": false - }, - "http_config": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "HTTP URL" - } - }, - "required": [ - "url" - ], + "required": ["command"], "additionalProperties": false }, "tools_to_execute": { @@ -2753,6 +2632,13 @@ "type": "string", "description": "Per-client override for tool sync interval (Go duration, e.g. '10m', '1h', 0 = use global, negative = disabled)" }, + "allowed_extra_headers": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Allowlist of request-level headers that callers may forward to this MCP server at execution time. Use ['*'] to allow all headers." + }, "is_ping_available": { "type": "boolean", "description": "Whether the MCP server supports ping for health checks (default: true)", @@ -2765,13 +2651,26 @@ "type": "number", "minimum": 0 } + }, + "allow_on_all_virtual_keys": { + "type": "boolean", + "description": "When true, this MCP server is accessible to all virtual keys without requiring explicit per-key assignment. All tools are allowed by default. If a virtual key has an explicit MCP config for this server, that config takes precedence and overrides this behaviour.", + "default": false } }, - "required": [ - "name", - "connection_type" - ], + "required": ["name", "connection_type"], "additionalProperties": false, + "if": { + "properties": { + "auth_type": { + "enum": ["oauth", "per_user_oauth"] + } + }, + "required": ["auth_type"] + }, + "then": { + "required": ["oauth_config_id"] + }, "oneOf": [ { "properties": { @@ -2779,9 +2678,7 @@ "const": "stdio" } }, - "required": [ - "stdio_config" - ] + "required": ["stdio_config"] }, { "properties": { @@ -2789,9 +2686,7 @@ "const": "websocket" } }, - "required": [ - "websocket_config" - ] + "required": ["websocket_config"] }, { "properties": { @@ -2800,8 +2695,12 @@ } }, "anyOf": [ - { "required": ["http_config"] }, - { "required": ["connection_string"] } + { + "required": ["http_config"] + }, + { + "required": ["connection_string"] + } ] }, { @@ -2810,9 +2709,7 @@ "const": "sse" } }, - "required": [ - "connection_string" - ] + "required": ["connection_string"] } ] }, @@ -2835,6 +2732,11 @@ "type": "string", "enum": ["server", "tool"], "description": "How tools are exposed in VFS for code execution" + }, + "disable_auto_tool_inject": { + "type": "boolean", + "description": "When true, MCP tools are not automatically injected into requests. Tools are only included when explicitly specified via request context filters or headers, such as x-bf-mcp-include-tools or x-bf-mcp-include-clients.", + "default": false } } }, @@ -2873,7 +2775,7 @@ }, "timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for Weaviate operations (e.g., '5s')" }, "class_name": { @@ -2888,10 +2790,7 @@ "description": "Properties for Weaviate vector store" } }, - "required": [ - "scheme", - "host" - ], + "required": ["scheme", "host"], "additionalProperties": false }, "redis_config": { @@ -2952,38 +2851,36 @@ }, "conn_max_lifetime": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Connection maximum lifetime (e.g., '30m')" }, "conn_max_idle_time": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Connection maximum idle time (e.g., '5m')" }, "dial_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket connection (e.g., '5s')" }, "read_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket reads (e.g., '3s')" }, "write_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for socket writes (e.g., '3s')" }, "context_timeout": { "type": "string", - "pattern": "^[0-9]+(ns|us|µs|ms|s|m|h)$", + "pattern": "^[0-9]+(ns|us|\u00b5s|ms|s|m|h)$", "description": "Timeout for Redis operations (e.g., '10s')" } }, - "required": [ - "addr" - ], + "required": ["addr"], "additionalProperties": false }, "qdrant_config": { @@ -3009,9 +2906,7 @@ "default": false } }, - "required": [ - "host" - ], + "required": ["host"], "additionalProperties": false }, "pinecone_config": { @@ -3027,10 +2922,7 @@ "description": "Index host URL from Pinecone console - REQUIRED (e.g., your-index.svc.environment.pinecone.io)" } }, - "required": [ - "api_key", - "index_host" - ], + "required": ["api_key", "index_host"], "additionalProperties": false }, "proxy_config": { @@ -3039,12 +2931,7 @@ "properties": { "type": { "type": "string", - "enum": [ - "none", - "http", - "socks5", - "environment" - ], + "enum": ["none", "http", "socks5", "environment"], "description": "Type of proxy to use" }, "url": { @@ -3065,9 +2952,7 @@ "description": "PEM-encoded CA certificate to trust for TLS connections through the proxy (for SSL-intercepting proxies)" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, "cluster_config": { @@ -3120,18 +3005,11 @@ "description": "Number of failed probes before marking as failed" } }, - "required": [ - "timeout_seconds", - "success_threshold", - "failure_threshold" - ], + "required": ["timeout_seconds", "success_threshold", "failure_threshold"], "additionalProperties": false } }, - "required": [ - "port", - "config" - ], + "required": ["port", "config"], "additionalProperties": false }, "discovery": { @@ -3144,14 +3022,7 @@ }, "type": { "type": "string", - "enum": [ - "kubernetes", - "dns", - "udp", - "consul", - "etcd", - "mdns" - ], + "enum": ["kubernetes", "dns", "udp", "consul", "etcd", "mdns"], "description": "Discovery mechanism type" }, "service_name": { @@ -3165,9 +3036,8 @@ "description": "Port to bind for cluster communication" }, "dial_timeout": { - "type": "integer", - "minimum": 0, - "description": "Timeout for discovery dial operations in nanoseconds" + "type": "string", + "description": "Timeout for discovery dial operations as a Go duration string (e.g. '5s', '1m')" }, "allowed_address_space": { "type": "array", @@ -3213,18 +3083,14 @@ "description": "mDNS service name for local network discovery" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } }, - "required": [ - "enabled" - ], + "required": ["enabled"], "additionalProperties": false }, - "saml_config": { + "scim_config": { "type": "object", "description": "SAML/SCIM (System for Cross-domain Identity Management) configuration", "properties": { @@ -3234,10 +3100,7 @@ }, "provider": { "type": "string", - "enum": [ - "okta", - "entra" - ], + "enum": ["okta", "entra"], "description": "SCIM provider type" }, "config": { @@ -3245,9 +3108,7 @@ "description": "Provider-specific configuration" } }, - "required": [ - "enabled" - ], + "required": ["enabled"], "additionalProperties": false, "allOf": [ { @@ -3299,7 +3160,11 @@ }, "clientSecret": { "type": "string", - "description": "Okta client secret (optional, required for token revocation)" + "description": "Okta client secret" + }, + "apiToken": { + "type": "string", + "description": "Okta API token for Admin API access" }, "audience": { "type": "string", @@ -3321,10 +3186,7 @@ "default": "roles" } }, - "required": [ - "issuerUrl", - "clientId" - ], + "required": ["issuerUrl", "clientId", "clientSecret", "apiToken"], "additionalProperties": false }, "entra_config": { @@ -3374,10 +3236,7 @@ "default": "roles" } }, - "required": [ - "tenantId", - "clientId" - ], + "required": ["tenantId", "clientId"], "additionalProperties": false }, "load_balancer_config": { @@ -3411,9 +3270,7 @@ } } }, - "required": [ - "enabled" - ], + "required": ["enabled"], "additionalProperties": false }, "guardrails_config": { @@ -3448,11 +3305,7 @@ }, "apply_to": { "type": "string", - "enum": [ - "input", - "output", - "both" - ], + "enum": ["input", "output", "both"], "description": "When to apply the guardrail (input, output, or both)" }, "sampling_rate": { @@ -3474,13 +3327,7 @@ "description": "IDs of provider configurations to use with this rule" } }, - "required": [ - "id", - "name", - "enabled", - "cel_expression", - "apply_to" - ], + "required": ["id", "name", "enabled", "cel_expression", "apply_to"], "additionalProperties": false } }, @@ -3516,12 +3363,7 @@ "description": "Provider-specific configuration" } }, - "required": [ - "id", - "provider_name", - "policy_name", - "enabled" - ], + "required": ["id", "provider_name", "policy_name", "enabled"], "additionalProperties": false } } @@ -3640,6 +3482,297 @@ } }, "additionalProperties": false + }, + "provider_pricing_override": { + "type": "object", + "description": "Scoped pricing override applied at runtime by the model catalog", + "properties": { + "id": { + "type": "string", + "description": "Unique pricing override ID" + }, + "name": { + "type": "string", + "description": "Human-readable name for this override" + }, + "scope_kind": { + "type": "string", + "description": "Scope level for this override", + "enum": [ + "global", + "provider", + "provider_key", + "virtual_key", + "virtual_key_provider", + "virtual_key_provider_key" + ] + }, + "virtual_key_id": { + "type": "string", + "description": "Virtual key ID (required for virtual_key* scopes)" + }, + "provider_id": { + "type": "string", + "description": "Provider ID (required for provider* scopes)" + }, + "provider_key_id": { + "type": "string", + "description": "Provider key ID (required for provider_key and virtual_key_provider_key scopes)" + }, + "match_type": { + "type": "string", + "description": "How the pattern is matched against model names", + "enum": ["exact", "wildcard"] + }, + "pattern": { + "type": "string", + "description": "Model name pattern to match (exact name or wildcard prefix ending with *)" + }, + "request_types": { + "type": "array", + "description": "Request types this override applies to. At least one value is required.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "pricing_patch": { + "type": "string", + "description": "JSON-encoded pricing fields to override (e.g. '{\"input_cost_per_token\":0.000001}')" + }, + "config_hash": { + "type": "string", + "description": "Internal hash for change detection (auto-managed)" + } + }, + "required": ["id", "name", "scope_kind", "match_type", "pattern", "request_types"], + "additionalProperties": false + }, + "pricing_override_match_type": { + "type": "string", + "enum": ["exact", "wildcard"] + }, + "pricing_override_request_type": { + "type": "string", + "enum": [ + "chat_completion", + "text_completion", + "responses", + "embedding", + "rerank", + "speech", + "transcription", + "image_generation", + "image_variation", + "image_edit", + "video_generation", + "video_remix" + ] + }, + "custom_provider_config": { + "type": "object", + "description": "Custom provider configuration for extending or customizing provider behavior", + "properties": { + "is_key_less": { + "type": "boolean", + "description": "Whether the custom provider requires a key" + }, + "base_provider_type": { + "type": "string", + "enum": [ + "openai", + "azure", + "anthropic", + "bedrock", + "cohere", + "vertex", + "mistral", + "ollama", + "groq", + "sgl", + "parasail", + "perplexity", + "cerebras", + "gemini", + "openrouter", + "elevenlabs", + "huggingface", + "nebius", + "xai", + "replicate", + "vllm", + "runway", + "fireworks" + ], + "description": "Base provider type to extend" + }, + "request_path_overrides": { + "type": "object", + "description": "Mapping of request type to custom path overriding the default provider path", + "additionalProperties": { + "type": "string" + } + }, + "allowed_requests": { + "type": "object", + "description": "Allowed request types for the custom provider", + "properties": { + "list_models": { + "type": "boolean" + }, + "text_completion": { + "type": "boolean" + }, + "text_completion_stream": { + "type": "boolean" + }, + "chat_completion": { + "type": "boolean" + }, + "chat_completion_stream": { + "type": "boolean" + }, + "responses": { + "type": "boolean" + }, + "responses_stream": { + "type": "boolean" + }, + "count_tokens": { + "type": "boolean" + }, + "embedding": { + "type": "boolean" + }, + "rerank": { + "type": "boolean" + }, + "ocr": { + "type": "boolean" + }, + "speech": { + "type": "boolean" + }, + "speech_stream": { + "type": "boolean" + }, + "transcription": { + "type": "boolean" + }, + "transcription_stream": { + "type": "boolean" + }, + "image_generation": { + "type": "boolean" + }, + "image_generation_stream": { + "type": "boolean" + }, + "image_edit": { + "type": "boolean" + }, + "image_edit_stream": { + "type": "boolean" + }, + "image_variation": { + "type": "boolean" + }, + "video_generation": { + "type": "boolean" + }, + "video_retrieve": { + "type": "boolean" + }, + "video_download": { + "type": "boolean" + }, + "video_delete": { + "type": "boolean" + }, + "video_list": { + "type": "boolean" + }, + "video_remix": { + "type": "boolean" + }, + "batch_create": { + "type": "boolean" + }, + "batch_list": { + "type": "boolean" + }, + "batch_retrieve": { + "type": "boolean" + }, + "batch_cancel": { + "type": "boolean" + }, + "batch_delete": { + "type": "boolean" + }, + "batch_results": { + "type": "boolean" + }, + "file_upload": { + "type": "boolean" + }, + "file_list": { + "type": "boolean" + }, + "file_retrieve": { + "type": "boolean" + }, + "file_delete": { + "type": "boolean" + }, + "file_content": { + "type": "boolean" + }, + "container_create": { + "type": "boolean" + }, + "container_list": { + "type": "boolean" + }, + "container_retrieve": { + "type": "boolean" + }, + "container_delete": { + "type": "boolean" + }, + "container_file_create": { + "type": "boolean" + }, + "container_file_list": { + "type": "boolean" + }, + "container_file_retrieve": { + "type": "boolean" + }, + "container_file_content": { + "type": "boolean" + }, + "container_file_delete": { + "type": "boolean" + }, + "passthrough": { + "type": "boolean" + }, + "passthrough_stream": { + "type": "boolean" + }, + "websocket_responses": { + "type": "boolean" + }, + "realtime": { + "type": "boolean" + } + }, + "additionalProperties": false + } + }, + "required": ["base_provider_type"], + "additionalProperties": false } } } diff --git a/transports/go.mod b/transports/go.mod index d9b31e67f5..f82590c865 100644 --- a/transports/go.mod +++ b/transports/go.mod @@ -1,6 +1,6 @@ module github.com/maximhq/bifrost/transports -go 1.26.2 +go 1.26.1 require ( github.com/andybalholm/brotli v1.2.0 @@ -12,18 +12,19 @@ require ( github.com/google/uuid v1.6.0 github.com/klauspost/compress v1.18.2 github.com/mark3labs/mcp-go v0.43.2 - github.com/maximhq/bifrost/core v1.4.19 - github.com/maximhq/bifrost/framework v1.2.38 - github.com/maximhq/bifrost/plugins/governance v1.4.38 - github.com/maximhq/bifrost/plugins/litellmcompat v0.0.27 - github.com/maximhq/bifrost/plugins/logging v1.4.38 - github.com/maximhq/bifrost/plugins/maxim v1.5.38 - github.com/maximhq/bifrost/plugins/otel v1.1.37 - github.com/maximhq/bifrost/plugins/semanticcache v1.4.36 - github.com/maximhq/bifrost/plugins/telemetry v1.4.38 + github.com/maximhq/bifrost/core v1.4.22 + github.com/maximhq/bifrost/framework v1.2.39 + github.com/maximhq/bifrost/plugins/governance v1.4.39 + github.com/maximhq/bifrost/plugins/litellmcompat v0.0.28 + github.com/maximhq/bifrost/plugins/logging v1.4.39 + github.com/maximhq/bifrost/plugins/maxim v1.5.39 + github.com/maximhq/bifrost/plugins/otel v1.1.38 + github.com/maximhq/bifrost/plugins/semanticcache v1.4.37 + github.com/maximhq/bifrost/plugins/telemetry v1.4.39 github.com/prometheus/client_golang v1.23.2 github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 github.com/stretchr/testify v1.11.1 + github.com/tidwall/gjson v1.18.0 github.com/valyala/fasthttp v1.68.0 go.uber.org/automaxprocs v1.6.0 golang.org/x/sync v0.20.0 @@ -111,7 +112,7 @@ require ( github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-sqlite3 v1.14.32 // indirect - github.com/maximhq/bifrost/plugins/mocker v1.4.37 // indirect + github.com/maximhq/bifrost/plugins/mocker v1.4.38 // indirect github.com/maximhq/maxim-go v0.2.1 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/oapi-codegen/runtime v1.1.1 // indirect @@ -129,7 +130,6 @@ require ( github.com/savsgio/gotils v0.0.0-20250408102913-196191ec6287 // indirect github.com/spf13/cast v1.10.0 // indirect github.com/stoewer/go-strcase v1.3.0 // indirect - github.com/tidwall/gjson v1.18.0 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/tidwall/sjson v1.2.5 // indirect diff --git a/transports/go.sum b/transports/go.sum index d9770c4540..ec1957eb11 100644 --- a/transports/go.sum +++ b/transports/go.sum @@ -213,26 +213,26 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/maximhq/bifrost/core v1.4.19 h1:RY6n8O1upUo/W05whsLO2OJtJpCpTFzplyq+6pk4NaY= -github.com/maximhq/bifrost/core v1.4.19/go.mod h1:iOh2p1qEKVolMG8l84LWcn1CseUL2Kl5c/iXFMiZI84= -github.com/maximhq/bifrost/framework v1.2.38 h1:uWITpE+PilOwo1CQCXhZ8iQ98hH7VOBroJlTMWGL8As= -github.com/maximhq/bifrost/framework v1.2.38/go.mod h1:S7vc+M9QOAj7RYK6pA0uauWHMxLxGRvlhHlZGArNsPw= -github.com/maximhq/bifrost/plugins/governance v1.4.38 h1:B9rYAYWo5hO57XGRmb5nNMyj8FAdN8rcRtBTl4SLDXQ= -github.com/maximhq/bifrost/plugins/governance v1.4.38/go.mod h1:mflU6sE+2gBNDd1jMTo/V523i0d48wWQXHdatJao9FA= -github.com/maximhq/bifrost/plugins/litellmcompat v0.0.27 h1:4ivjrvHULZq0WzslgULxl8XKbT2roZm7G1oa+AzaUU4= -github.com/maximhq/bifrost/plugins/litellmcompat v0.0.27/go.mod h1:OLnwZ8zuok9prNacK0fSA2Pfv7eH18a+Hc/pCNXmT54= -github.com/maximhq/bifrost/plugins/logging v1.4.38 h1:WMa86+jcGGjEYxoneH0G1k1WJCOiGan3cWene0CDRak= -github.com/maximhq/bifrost/plugins/logging v1.4.38/go.mod h1:zXeRVS4eCNhqxGeQbd/PZfwMYG8uGS1BBolq8ihc8QE= -github.com/maximhq/bifrost/plugins/maxim v1.5.38 h1:UbpwQeXDOCQiBmbFxCBa5h4RQSNOqZS72+sHSEXswVw= -github.com/maximhq/bifrost/plugins/maxim v1.5.38/go.mod h1:FRVtSlOlI3IYeXwYrMTbYcrGFENCauz6lZSWSSQ0+TE= -github.com/maximhq/bifrost/plugins/mocker v1.4.37 h1:IO9sQW3TZhHc3O1lwMYqCYEN75Dl8cOrDK916sVl4UU= -github.com/maximhq/bifrost/plugins/mocker v1.4.37/go.mod h1:CjFR49kf9teuZFXnQdGxNIMdy9X1GFy5PzHJ4yjsPqM= -github.com/maximhq/bifrost/plugins/otel v1.1.37 h1:gtotNOmmo3anYW/KnsXxz10FYVpmPVBFpiBrd/emdv4= -github.com/maximhq/bifrost/plugins/otel v1.1.37/go.mod h1:qJGOjd+Z5hRELXIllhRlEHzw3kCfhXDYKv/VSxyNJsY= -github.com/maximhq/bifrost/plugins/semanticcache v1.4.36 h1:QWVf6LHzur44iu4zsK/S03co3BZNigrjFZhTTu1y6KI= -github.com/maximhq/bifrost/plugins/semanticcache v1.4.36/go.mod h1:V6Y+xF/fhYGqdz96ICo4e29d37bqlizKWvza/ufHdf4= -github.com/maximhq/bifrost/plugins/telemetry v1.4.38 h1:VZdzQRnvRpQGDg6V13d6b21UkIt9Zh0cjdJ/UNj50x0= -github.com/maximhq/bifrost/plugins/telemetry v1.4.38/go.mod h1:50AuBQBx1n9o5CVohs/xO18TSXoMM0JgtKn/mC86uvE= +github.com/maximhq/bifrost/core v1.4.22 h1:iK3OhfOFzrivnQgGjFBqAEkXYzn5BYSPkwoXH4o1tWM= +github.com/maximhq/bifrost/core v1.4.22/go.mod h1:IaGSRlCgvy6Nr7Xiyu44sJJo9JbK188EL4QPXIcvURA= +github.com/maximhq/bifrost/framework v1.2.39 h1:BCYfFFHBcx1xlnAy4GN/6+jLOUyysWQvdVjZ62OzAT4= +github.com/maximhq/bifrost/framework v1.2.39/go.mod h1:+HrM35y5Jid35NKwUcG4GAXvOuCnMu/5bltqdIAhy84= +github.com/maximhq/bifrost/plugins/governance v1.4.39 h1:i6xDCzbsQ018rTfXvjGBl860KAAEWQu/ANC/lm4Ojiw= +github.com/maximhq/bifrost/plugins/governance v1.4.39/go.mod h1:KkTG4Bs6+7kAh4r/pzUP0sal8Ln8cVUQgaUqsAH0dFo= +github.com/maximhq/bifrost/plugins/litellmcompat v0.0.28 h1:hoJ/zDCfDbKrllTCBeYVGrCKRsaJ5VNK9NQ0ddl23LY= +github.com/maximhq/bifrost/plugins/litellmcompat v0.0.28/go.mod h1:jPg2Scl6aCU5VOSefl619FAeNqI3xAKYtVPyyvtKf6E= +github.com/maximhq/bifrost/plugins/logging v1.4.39 h1:9Xt/LksiX1JJYTIO2EuhhZSvpzqKYWnBRwe0UZ2qbpw= +github.com/maximhq/bifrost/plugins/logging v1.4.39/go.mod h1:qym/dspqhLAF+NzkF6EVUyFPlYMlCPP2sVbv5opo7Kg= +github.com/maximhq/bifrost/plugins/maxim v1.5.39 h1:OXIlGVh4eQ6OeaEzFFxiNdsqtGHAGIJUGVqx50DeqhQ= +github.com/maximhq/bifrost/plugins/maxim v1.5.39/go.mod h1:M1W+v+jti2Vk05UYNPkvxkcYjvD9+spChOK5G3fQXRY= +github.com/maximhq/bifrost/plugins/mocker v1.4.38 h1:GsSv8EdF0+A5w4jCO7NtwPFs7fNeMthWjLvhzfe6qqQ= +github.com/maximhq/bifrost/plugins/mocker v1.4.38/go.mod h1:CadNUhWUUuudzYvF2YiaQEDmijporxWiLDNTXYw9CBA= +github.com/maximhq/bifrost/plugins/otel v1.1.38 h1:1uV2pXxFdKhO7eIGwnB/oqOThTGExBQojpkeNyaE0lg= +github.com/maximhq/bifrost/plugins/otel v1.1.38/go.mod h1:ipCwIeVbhoOlax6Gnydfu89L+jg3iBDCokh7xDhd510= +github.com/maximhq/bifrost/plugins/semanticcache v1.4.37 h1:3EugTM+UxRH3H+Zusl7L81SOM4EwClX4qvmwiu1Rn+8= +github.com/maximhq/bifrost/plugins/semanticcache v1.4.37/go.mod h1:RuI7C1XUjrcGjao0Y0SypYPU05ahpDcXFUGekUQlvF0= +github.com/maximhq/bifrost/plugins/telemetry v1.4.39 h1:uw2Kmq2JL9ByiJWx6dge0LzlF+yBasnuh+La3/IFPEI= +github.com/maximhq/bifrost/plugins/telemetry v1.4.39/go.mod h1:yVHkJSt3ENR0Epol7twHelu5PhbLNPdZsZJcSzsC6p8= github.com/maximhq/maxim-go v0.2.1 h1:hCp8dQ4HsyyNC+y5HCUuY/HFD0sOnGkjL5MdYCHkgEQ= github.com/maximhq/maxim-go v0.2.1/go.mod h1:nwFznXy0Dn4mxXGU4X+BCnE3VP68L+FPEaW0yUgk96o= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= diff --git a/transports/version b/transports/version index d6e2616797..580ef4b6c1 100644 --- a/transports/version +++ b/transports/version @@ -1 +1 @@ -1.4.22 +1.4.23 diff --git a/ui/app/pprof/page.tsx b/ui/app/pprof/page.tsx index a61781989e..4d939f0d38 100644 --- a/ui/app/pprof/page.tsx +++ b/ui/app/pprof/page.tsx @@ -16,7 +16,7 @@ import { RotateCcw, TrendingUp, } from "lucide-react"; -import React, { useCallback, useEffect, useMemo, useState } from "react"; +import React, { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { Area, AreaChart, CartesianGrid, ResponsiveContainer, Tooltip, XAxis, YAxis } from "recharts"; // ============================================================================ @@ -102,6 +102,135 @@ function saveSkippedGoroutineFiles(skipped: Set): void { type AllocationSortField = "function" | "file" | "bytes" | "count"; type SortDirection = "asc" | "desc"; +type AllocationSortState = { field: AllocationSortField; direction: SortDirection }; +type LeakSeverity = "high" | "medium" | "low"; + +interface LeakCandidate { + key: string; + function: string; + file: string; + line: number; + stack: string[]; + liveBytes: number; + cumulativeBytes: number; + retention: number; + liveCount: number; + samples: number[]; + isGrowing: boolean; + growthBytes: number; + severity: LeakSeverity; +} + +// ~60 seconds of history at 10s polling interval +const LEAK_MAX_SAMPLES = 6; +const LEAK_MIN_GROWTH_SAMPLES = 3; +const LEAK_SEVERITY_RANK: Record = { high: 0, medium: 1, low: 2 }; + +function makeStackKey(stack: string[]): string { + return stack.join("\n"); +} + +function isMonotonicGrowing(samples: number[]): boolean { + if (samples.length < LEAK_MIN_GROWTH_SAMPLES) return false; + for (let i = 1; i < samples.length; i++) { + if (samples[i] < samples[i - 1]) return false; + } + return samples[samples.length - 1] > samples[0]; +} + +function classifyLeakSeverity(retention: number, liveBytes: number, isGrowing: boolean): LeakSeverity | null { + const MB = 1024 * 1024; + if (isGrowing && retention >= 0.5 && liveBytes >= MB) return "high"; + if (retention >= 0.8 && liveBytes >= 10 * MB) return "high"; + if (retention >= 0.5 && liveBytes >= MB) return "medium"; + if (retention >= 0.3 && liveBytes >= 100 * 1024) return "low"; + return null; +} + +function detectLeaks( + cumulative: AllocationInfo[], + live: AllocationInfo[], + inuseHistory: Map, +): LeakCandidate[] { + const cumMap = new Map(); + for (const c of cumulative) { + cumMap.set(makeStackKey(c.stack), c); + } + + const candidates: LeakCandidate[] = []; + for (const l of live) { + const key = makeStackKey(l.stack); + const cum = cumMap.get(key); + if (!cum || cum.bytes === 0) continue; + const retention = l.bytes / cum.bytes; + const samples = inuseHistory.get(key) ?? []; + const isGrowing = isMonotonicGrowing(samples); + const growthBytes = samples.length >= 2 ? samples[samples.length - 1] - samples[0] : 0; + const severity = classifyLeakSeverity(retention, l.bytes, isGrowing); + if (!severity) continue; + candidates.push({ + key, + function: l.function, + file: l.file, + line: l.line, + stack: l.stack, + liveBytes: l.bytes, + cumulativeBytes: cum.bytes, + retention, + liveCount: l.count, + samples: [...samples], + isGrowing, + growthBytes, + severity, + }); + } + + candidates.sort((a, b) => { + if (a.severity !== b.severity) return LEAK_SEVERITY_RANK[a.severity] - LEAK_SEVERITY_RANK[b.severity]; + return b.liveBytes - a.liveBytes; + }); + return candidates; +} + +function getLeakSeverityClasses(severity: LeakSeverity): string { + switch (severity) { + case "high": + return "text-red-400 bg-red-400/10 border-red-400/20"; + case "medium": + return "text-amber-400 bg-amber-400/10 border-amber-400/20"; + case "low": + return "text-zinc-400 bg-zinc-400/10 border-zinc-400/20"; + } +} + +function getRetentionColor(retention: number): string { + if (retention >= 0.8) return "text-red-400"; + if (retention >= 0.5) return "text-amber-400"; + return "text-zinc-400"; +} + +function sortAllocations(list: AllocationInfo[], sort: AllocationSortState): AllocationInfo[] { + const sorted = [...list]; + sorted.sort((a, b) => { + let cmp = 0; + switch (sort.field) { + case "function": + cmp = a.function.localeCompare(b.function); + break; + case "file": + cmp = a.file.localeCompare(b.file); + break; + case "bytes": + cmp = a.bytes - b.bytes; + break; + case "count": + cmp = a.count - b.count; + break; + } + return sort.direction === "asc" ? cmp : -cmp; + }); + return sorted; +} // ============================================================================ // Components @@ -139,17 +268,25 @@ function AllocationTable({ sortField, sortDirection, onSort, + expandedKeys, + onToggle, + bytesColorClass = "text-rose-400", + testIdPrefix = "pprof-sort", }: { allocations: AllocationInfo[]; sortField: AllocationSortField; sortDirection: SortDirection; onSort: (field: AllocationSortField) => void; + expandedKeys: Set; + onToggle: (key: string) => void; + bytesColorClass?: string; + testIdPrefix?: string; }) { const SortIcon = sortDirection === "asc" ? ArrowUp : ArrowDown; const SortHeader = ({ field, children }: { field: AllocationSortField; children: React.ReactNode }) => ( - @@ -161,6 +298,7 @@ function AllocationTable({ + - {allocations.map((alloc, i) => ( - - - - - - - ))} + {allocations.map((alloc) => { + const hasStack = alloc.stack && alloc.stack.length > 0; + const key = hasStack ? makeStackKey(alloc.stack) : `${alloc.function}:${alloc.file}:${alloc.line}`; + const isExpanded = expandedKeys.has(key); + return ( + + onToggle(key) : undefined} + onKeyDown={ + hasStack + ? (e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggle(key); + } + } + : undefined + } + data-testid="pprof-alloc-row" + className={`border-b border-zinc-800/50 hover:bg-zinc-800/30 ${hasStack ? "cursor-pointer" : ""}`} + > + + + + + + + {isExpanded && hasStack && ( + + + + )} + + ); + })} {allocations.length === 0 && ( - @@ -199,6 +381,147 @@ function AllocationTable({ ); } +// Leak Candidates Table +function LeakTable({ + candidates, + expandedKeys, + onToggle, +}: { + candidates: LeakCandidate[]; + expandedKeys: Set; + onToggle: (key: string) => void; +}) { + return ( +
+
Function File:Line Bytes @@ -168,27 +306,71 @@ function AllocationTable({
- {alloc.function} - - - {alloc.file}:{alloc.line} - - - {formatBytes(alloc.bytes)} - - {alloc.count.toLocaleString()} -
+ {hasStack ? ( + isExpanded ? : + ) : null} + + {alloc.function} + + + {alloc.file}:{alloc.line} + + + {formatBytes(alloc.bytes)} + + {alloc.count.toLocaleString()} +
+ +
Stack Trace
+
+ {alloc.stack.map((line, j) => ( +
+ {line} +
+ ))} +
+
+ No allocations data available
+ + + + + + + + + + + + + {candidates.map((c) => { + const rowKey = c.key; + const isExpanded = expandedKeys.has(rowKey); + return ( + + onToggle(rowKey)} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggle(rowKey); + } + }} + data-testid="pprof-leak-row" + className="cursor-pointer border-b border-zinc-800/50 hover:bg-zinc-800/30" + > + + + + + + + + + + {isExpanded && ( + + + + )} + + ); + })} + {candidates.length === 0 && ( + + + + )} + +
+ + Severity + + Function + + File:Line + + Live + + Retention + + Trend + + Live Count +
+ {isExpanded ? : } + + + {c.severity} + + + {c.function} + + + {c.file}:{c.line} + + + {formatBytes(c.liveBytes)} + + + {(c.retention * 100).toFixed(0)}% + + + {c.isGrowing ? ( + + +{formatBytes(c.growthBytes)} + + ) : ( + stable + )} + + {c.liveCount.toLocaleString()} +
+ +
+ + Cumulative: {formatBytes(c.cumulativeBytes)} + + + Retained: {(c.retention * 100).toFixed(1)}% + + {c.samples.length >= 2 && ( + + Last {c.samples.length * 10}s:{" "} + {c.samples.map((b) => formatBytes(b)).join(" → ")} + + )} +
+
Stack Trace
+
+ {c.stack.map((line, j) => ( +
+ {line} +
+ ))} +
+
+ No obvious leak signatures — all live allocations have normal retention ratios. +
+ + ); +} + // Goroutine Group Component function GoroutineGroupRow({ group, @@ -287,10 +610,14 @@ export default function PprofPage() { const [expandedGoroutines, setExpandedGoroutines] = useState>(new Set()); const [skippedGoroutines, setSkippedGoroutines] = useState>(new Set()); const [hasLoadedSkipped, setHasLoadedSkipped] = useState(false); - const [allocationSort, setAllocationSort] = useState<{ - field: AllocationSortField; - direction: SortDirection; - }>({ field: "bytes", direction: "desc" }); + const [allocationSort, setAllocationSort] = useState({ field: "bytes", direction: "desc" }); + const [inuseSort, setInuseSort] = useState({ field: "bytes", direction: "desc" }); + const [expandedAlloc, setExpandedAlloc] = useState>(new Set()); + const [expandedInuse, setExpandedInuse] = useState>(new Set()); + const [expandedLeaks, setExpandedLeaks] = useState>(new Set()); + const inuseHistoryRef = useRef>(new Map()); + const lastInuseSnapshotRef = useRef(null); + const [historyVersion, setHistoryVersion] = useState(0); // Load skipped goroutines from localStorage on client useEffect(() => { @@ -333,29 +660,55 @@ export default function PprofPage() { }, [data?.history]); // Sort allocations - const sortedAllocations = useMemo(() => { - if (!data?.top_allocations) return []; - const sorted = [...data.top_allocations]; - sorted.sort((a, b) => { - let cmp = 0; - switch (allocationSort.field) { - case "function": - cmp = a.function.localeCompare(b.function); - break; - case "file": - cmp = a.file.localeCompare(b.file); - break; - case "bytes": - cmp = a.bytes - b.bytes; - break; - case "count": - cmp = a.count - b.count; - break; - } - return allocationSort.direction === "asc" ? cmp : -cmp; - }); - return sorted; - }, [data?.top_allocations, allocationSort]); + const sortedAllocations = useMemo( + () => sortAllocations(data?.top_allocations ?? [], allocationSort), + [data?.top_allocations, allocationSort], + ); + const sortedInuseAllocations = useMemo( + () => sortAllocations(data?.inuse_allocations ?? [], inuseSort), + [data?.inuse_allocations, inuseSort], + ); + + // Roll a ~60s window of inuse bytes per stack signature so we can detect + // sites whose live memory grows monotonically across polls. Dedupe on + // data.timestamp (stamped fresh by the backend each poll) rather than + // array identity: RTK Query's default structural sharing reuses the + // inuse_allocations reference when the snapshot is deep-equal, which + // would silently skip samples on idle polls and shrink the window. + useEffect(() => { + const inuse = data?.inuse_allocations; + const snapshotTs = data?.timestamp; + if (!inuse || !snapshotTs || lastInuseSnapshotRef.current === snapshotTs) return; + lastInuseSnapshotRef.current = snapshotTs; + const map = inuseHistoryRef.current; + const seen = new Set(); + for (const l of inuse) { + const key = makeStackKey(l.stack); + seen.add(key); + const samples = map.get(key) ?? []; + samples.push(l.bytes); + while (samples.length > LEAK_MAX_SAMPLES) samples.shift(); + map.set(key, samples); + } + // Drop sites absent from the latest snapshot (either freed or evicted + // from top-N) so the map stays bounded. + for (const key of [...map.keys()]) { + if (!seen.has(key)) map.delete(key); + } + setHistoryVersion((v) => v + 1); + }, [data?.timestamp, data?.inuse_allocations]); + + const leakCandidates = useMemo( + () => detectLeaks(data?.top_allocations ?? [], data?.inuse_allocations ?? [], inuseHistoryRef.current), + // historyVersion bumps when the ref is mutated; top/inuse refs change per poll + [data?.top_allocations, data?.inuse_allocations, historyVersion], + ); + + const leakSummary = useMemo(() => { + const counts: Record = { high: 0, medium: 0, low: 0 }; + for (const c of leakCandidates) counts[c.severity]++; + return counts; + }, [leakCandidates]); // Detect goroutine count trend const goroutineTrend = useMemo(() => { @@ -394,6 +747,49 @@ export default function PprofPage() { })); }, []); + const handleInuseSort = useCallback((field: AllocationSortField) => { + setInuseSort((prev) => ({ + field, + direction: prev.field === field && prev.direction === "desc" ? "asc" : "desc", + })); + }, []); + + const toggleAllocExpand = useCallback((key: string) => { + setExpandedAlloc((prev) => { + const next = new Set(prev); + if (next.has(key)) { + next.delete(key); + } else { + next.add(key); + } + return next; + }); + }, []); + + const toggleInuseExpand = useCallback((key: string) => { + setExpandedInuse((prev) => { + const next = new Set(prev); + if (next.has(key)) { + next.delete(key); + } else { + next.add(key); + } + return next; + }); + }, []); + + const toggleLeakExpand = useCallback((key: string) => { + setExpandedLeaks((prev) => { + const next = new Set(prev); + if (next.has(key)) { + next.delete(key); + } else { + next.add(key); + } + return next; + }); + }, []); + const toggleGoroutineExpand = useCallback((id: string) => { setExpandedGoroutines((prev) => { const next = new Set(prev); @@ -635,18 +1031,81 @@ export default function PprofPage() { - {/* Allocations Table */} + {/* Potential Leaks — stacks accumulating live memory without being freed */}
-
- - Memory Allocations - ({sortedAllocations.length} allocations) +
+
+ + Potential Leaks + ({leakCandidates.length} suspicious) + {leakSummary.high > 0 && ( + + {leakSummary.high} high + + )} + {leakSummary.medium > 0 && ( + + {leakSummary.medium} medium + + )} + {leakSummary.low > 0 && ( + + {leakSummary.low} low + + )} +
+

+ Stacks whose live bytes remain a large fraction of what they ever allocated (retention), optionally with + live bytes trending upward over the last minute. Growth + high retention together is the strongest leak + signal. +

+
+ +
+ + {/* Live Heap Allocations — what's currently consuming the heap */} +
+
+
+ + Live Heap Allocations + ({sortedInuseAllocations.length} sites) +
+

+ Call stacks currently holding memory on the heap right now — expand a row to see the full stack. +

+
+ +
+ + {/* Cumulative Memory Allocations — total since process start */} +
+
+
+ + Cumulative Memory Allocations + ({sortedAllocations.length} sites) +
+

+ Total bytes allocated since process start (includes memory already freed) — expand a row to see the full stack. +

diff --git a/ui/app/workspace/observability/fragments/otelFormFragment.tsx b/ui/app/workspace/observability/fragments/otelFormFragment.tsx index 7208ba8377..b35b6baa5c 100644 --- a/ui/app/workspace/observability/fragments/otelFormFragment.tsx +++ b/ui/app/workspace/observability/fragments/otelFormFragment.tsx @@ -21,7 +21,7 @@ interface OtelFormFragmentProps { service_name?: string; collector_url?: string; headers?: Record; - trace_type?: "otel" | "genai_extension" | "vercel" | "arize_otel"; + trace_type?: "genai_extension" | "vercel" | "open_inference"; protocol?: "http" | "grpc"; // TLS configuration tls_ca_cert?: string; @@ -37,7 +37,13 @@ interface OtelFormFragmentProps { isLoading?: boolean; } -export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelete, isDeleting = false, isLoading = false }: OtelFormFragmentProps) { +export function OtelFormFragment({ + currentConfig: initialConfig, + onSave, + onDelete, + isDeleting = false, + isLoading = false, +}: OtelFormFragmentProps) { const hasOtelAccess = useRbac(RbacResource.Observability, RbacOperation.Update); const [isSaving, setIsSaving] = useState(false); const form = useForm({ @@ -50,7 +56,7 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet service_name: initialConfig?.service_name ?? "bifrost", collector_url: initialConfig?.collector_url ?? "", headers: initialConfig?.headers ?? {}, - trace_type: initialConfig?.trace_type ?? "otel", + trace_type: initialConfig?.trace_type ?? "genai_extension", protocol: initialConfig?.protocol ?? "http", tls_ca_cert: initialConfig?.tls_ca_cert ?? "", insecure: initialConfig?.insecure ?? true, @@ -94,7 +100,7 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet service_name: initialConfig?.service_name ?? "bifrost", collector_url: initialConfig?.collector_url || "", headers: initialConfig?.headers || {}, - trace_type: initialConfig?.trace_type || "otel", + trace_type: initialConfig?.trace_type || "genai_extension", protocol: initialConfig?.protocol || "http", tls_ca_cert: initialConfig?.tls_ca_cert ?? "", insecure: initialConfig?.insecure ?? true, @@ -106,7 +112,9 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet }, [form, initialConfig]); const traceTypeOptions: { value: string; label: string; disabled?: boolean; disabledReason?: string }[] = [ - { value: "otel", label: "OTEL - GenAI Extension" }, + { value: "genai_extension", label: "OTel GenAI Extension (Recommended)" }, + { value: "vercel", label: "Vercel AI SDK", disabled: true, disabledReason: "Coming soon" }, + { value: "open_inference", label: "Arize OpenInference", disabled: true, disabledReason: "Coming soon" }, ]; const protocolOptions: { value: string; label: string; disabled?: boolean; disabledReason?: string }[] = [ { value: "http", label: "HTTP" }, @@ -406,7 +414,7 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet service_name: initialConfig?.service_name ?? "bifrost", collector_url: initialConfig?.collector_url ?? "", headers: initialConfig?.headers ?? {}, - trace_type: initialConfig?.trace_type ?? "otel", + trace_type: initialConfig?.trace_type ?? "genai_extension", protocol: initialConfig?.protocol ?? "http", tls_ca_cert: initialConfig?.tls_ca_cert ?? "", insecure: initialConfig?.insecure ?? true, diff --git a/ui/components/ui/select.tsx b/ui/components/ui/select.tsx index deb430fdb4..66ab3db864 100644 --- a/ui/components/ui/select.tsx +++ b/ui/components/ui/select.tsx @@ -50,9 +50,9 @@ function SelectContent({ className, children, position = "popper", ...props }: R @@ -88,11 +88,12 @@ interface SelectItemProps extends React.ComponentProps=14.0.0" } }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": { + "version": "1.8.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/wasi-threads": "1.1.0", + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": { + "version": "1.8.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": { + "version": "1.1.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": { + "version": "1.1.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/core": "^1.7.1", + "@emnapi/runtime": "^1.7.1", + "@tybys/wasm-util": "^0.10.1" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": { + "version": "0.10.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": { + "version": "2.8.1", + "dev": true, + "inBundle": true, + "license": "0BSD", + "optional": true + }, "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { "version": "4.2.1", "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.2.1.tgz", @@ -6681,9 +6745,9 @@ } }, "node_modules/dompurify": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.3.tgz", - "integrity": "sha512-Oj6pzI2+RqBfFG+qOaOLbFXLQ90ARpcGG6UePL82bJLtdsa6CYJD7nmiU8MW9nQNOtCHV3lZ/Bzq1X0QYbBZCA==", + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.0.tgz", + "integrity": "sha512-nolgK9JcaUXMSmW+j1yaSvaEaoXYHwWyGJlkoCTghc97KgGDDSnpoU/PlEnw63Ah+TGKFOyY+X5LnxaWbCSfXg==", "license": "(MPL-2.0 OR Apache-2.0)", "optional": true, "optionalDependencies": { @@ -7762,9 +7826,9 @@ "license": "ISC" }, "node_modules/follow-redirects": { - "version": "1.15.11", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", - "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.16.0.tgz", + "integrity": "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==", "funding": [ { "type": "individual", diff --git a/ui/package.json b/ui/package.json index 86aba861a4..42a26d6178 100644 --- a/ui/package.json +++ b/ui/package.json @@ -100,6 +100,8 @@ "vitest": "4.0.18" }, "overrides": { - "tar": "7.5.3" + "tar": "7.5.3", + "dompurify": "3.4.0", + "follow-redirects": "1.16.0" } }