diff --git a/core/providers/anthropic/anthropic.go b/core/providers/anthropic/anthropic.go index cd49b3a212..355c68bcf4 100644 --- a/core/providers/anthropic/anthropic.go +++ b/core/providers/anthropic/anthropic.go @@ -202,7 +202,7 @@ func (provider *AnthropicProvider) completeRequest(ctx *schemas.BifrostContext, } req.Header.Set("anthropic-version", provider.apiVersion) - if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(provider.networkConfig.ExtraHeaders, ctx), schemas.Anthropic, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { + if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, provider.networkConfig.ExtraHeaders), schemas.Anthropic, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { req.Header.Set(AnthropicBetaHeader, strings.Join(betaHeaders, ",")) } else { req.Header.Del(AnthropicBetaHeader) @@ -615,7 +615,7 @@ func HandleAnthropicChatCompletionStreaming( providerUtils.SetExtraHeaders(ctx, req, extraHeaders, []string{AnthropicBetaHeader}) - if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(extraHeaders, ctx), providerName, betaHeaderOverrides); len(betaHeaders) > 0 { + if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, extraHeaders), providerName, betaHeaderOverrides); len(betaHeaders) > 0 { req.Header.Set(AnthropicBetaHeader, strings.Join(betaHeaders, ",")) } else { req.Header.Del(AnthropicBetaHeader) @@ -1080,7 +1080,7 @@ func HandleAnthropicResponsesStream( providerUtils.SetExtraHeaders(ctx, req, extraHeaders, []string{AnthropicBetaHeader}) - if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(extraHeaders, ctx), providerName, betaHeaderOverrides); len(betaHeaders) > 0 { + if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, extraHeaders), providerName, betaHeaderOverrides); len(betaHeaders) > 0 { req.Header.Set(AnthropicBetaHeader, strings.Join(betaHeaders, ",")) } else { req.Header.Del(AnthropicBetaHeader) diff --git a/core/providers/anthropic/request_builder.go b/core/providers/anthropic/request_builder.go index 70c7037649..e64195218a 100644 --- a/core/providers/anthropic/request_builder.go +++ b/core/providers/anthropic/request_builder.go @@ -310,7 +310,7 @@ func BuildAnthropicResponsesRequestBody(ctx *schemas.BifrostContext, request *sc } if cfg.InjectBetaHeadersIntoBody { - if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(cfg.ProviderExtraHeaders, ctx), cfg.Provider, cfg.BetaHeaderOverrides); len(betaHeaders) > 0 { + if betaHeaders := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, cfg.ProviderExtraHeaders), cfg.Provider, cfg.BetaHeaderOverrides); len(betaHeaders) > 0 { jsonBody, err = providerUtils.SetJSONField(jsonBody, "anthropic_beta", betaHeaders) if err != nil { return nil, newErr(schemas.ErrProviderRequestMarshal, err, jsonBody) diff --git a/core/providers/anthropic/types.go b/core/providers/anthropic/types.go index 2d33d0b701..e97b64f04b 100644 --- a/core/providers/anthropic/types.go +++ b/core/providers/anthropic/types.go @@ -80,8 +80,10 @@ const ( AnthropicContext1MBetaHeaderPrefix = "context-1m-" AnthropicFastModeBetaHeaderPrefix = "fast-mode-" AnthropicRedactThinkingBetaHeaderPrefix = "redact-thinking-" - AnthropicTaskBudgetsBetaHeaderPrefix = "task-budgets-" - AnthropicEagerInputStreamingBetaHeaderPrefix = "fine-grained-tool-streaming-" + AnthropicTaskBudgetsBetaHeaderPrefix = "task-budgets-" + AnthropicEagerInputStreamingBetaHeaderPrefix = "fine-grained-tool-streaming-" + AnthropicContextManagementBetaHeaderPrefix = "context-management-" + AnthropicCompactionBetaHeaderPrefix = "compact-" ) // ProviderFeatureSupport defines which Anthropic features a given provider supports. @@ -114,8 +116,9 @@ type ProviderFeatureSupport struct { InputExamples bool // tool.input_examples standalone — tool-examples-2025-10-29. Bedrock supports this independently of the AdvancedToolUse bundle (cite: B-header). On Anthropic / Azure the bundle implicitly covers it. StructuredOutputs bool // strict tool validation / output_format (cite: A) PromptCachingScope bool // cache_control.scope — prompt-caching-scope-2026-01-05 (cite: A) - Compaction bool // compact_20260112 (cite: A, B-header) - ContextEditing bool // clear_tool_uses / clear_thinking (cite: A, B-header) + Compaction bool // compact_20260112 (cite: A, B-header) + ContextEditing bool // clear_tool_uses / clear_thinking (cite: A, B-header) + ContextManagementField bool // provider accepts the context_management JSON body field at all; false → entire field dropped regardless of edit types FilesAPI bool // files-api-2025-04-14, file_id source (cite: A) InterleavedThinking bool // interleaved thinking between tool calls (cite: A, B-header; fails on non-allowlisted models on Bedrock/Vertex) Skills bool // Agent Skills — container.skills object (cite: A) @@ -142,7 +145,7 @@ var ProviderFeatures = map[schemas.ModelProvider]ProviderFeatureSupport{ WebSearch: true, WebSearchDynamic: true, WebFetch: true, CodeExecution: true, ComputerUse: true, Bash: true, Memory: true, TextEditor: true, ToolSearch: true, MCP: true, AdvancedToolUse: true, InputExamples: true, StructuredOutputs: true, PromptCachingScope: true, - Compaction: true, ContextEditing: true, FilesAPI: true, + Compaction: true, ContextEditing: true, ContextManagementField: true, FilesAPI: true, InterleavedThinking: true, Skills: true, ContainerBasic: true, Context1M: true, FastMode: true, RedactThinking: true, TaskBudgets: true, InferenceGeo: true, EagerInputStreaming: true, AdvisorTool: true, @@ -151,13 +154,22 @@ var ProviderFeatures = map[schemas.ModelProvider]ProviderFeatureSupport{ // Notably NOT supported: MCP (MCP-excl), Skills/container.skills, // InferenceGeo, FastMode, TaskBudgets, AdvisorTool, StructuredOutputs, // PromptCachingScope (400 "unexpected beta header" per LiteLLM #19984), + // ContextEditing (400 "unexpected beta header" per live API error), + // ContextManagementField (400 "Extra inputs are not permitted" per live API error + // when the request body carries a context_management object). + // Compaction IS supported on Vertex via the compact-2026-01-12 beta header even + // though Anthropic's compaction docs don't list Vertex (verified by live + // testing). The header passes through FilterBetaHeadersForProvider because + // Compaction: true; the body-field stripper at utils.go:460 removes any + // client-side context_management payload (gated by ContextManagementField: + // false) so the request still succeeds. The two flags are intentionally + // independent: one controls header forwarding, the other controls body shape. // FilesAPI, WebFetch, CodeExecution, AdvancedToolUse, RedactThinking. schemas.Vertex: { WebSearch: true, // web search GA on Vertex per A; earlier code restricted to web_search_20250305 — A doesn't qualify ComputerUse: true, Bash: true, Memory: true, TextEditor: true, ToolSearch: true, ContainerBasic: true, Compaction: true, - ContextEditing: true, InterleavedThinking: true, // V-platform confirms; fails on non-allowlisted 4-series Context1M: true, EagerInputStreaming: true, // fine-grained-tool-streaming GA per A @@ -178,6 +190,7 @@ var ProviderFeatures = map[schemas.ModelProvider]ProviderFeatureSupport{ StructuredOutputs: true, Compaction: true, // compact-2026-01-12 per B-header ContextEditing: true, // context-management-2025-06-27 per B-header (bundles memory) + ContextManagementField: true, // Bedrock accepts context_management body field InterleavedThinking: true, // per B-header; model-allowlisted Context1M: true, // Opus 4.6 / Sonnet 4.6 per A EagerInputStreaming: true, // fine-grained-tool-streaming-2025-05-14 per B-header @@ -193,7 +206,7 @@ var ProviderFeatures = map[schemas.ModelProvider]ProviderFeatureSupport{ WebSearch: true, WebSearchDynamic: true, WebFetch: true, CodeExecution: true, ComputerUse: true, Bash: true, Memory: true, TextEditor: true, ToolSearch: true, MCP: true, AdvancedToolUse: true, InputExamples: true, StructuredOutputs: true, PromptCachingScope: true, - Compaction: true, ContextEditing: true, FilesAPI: true, + Compaction: true, ContextEditing: true, ContextManagementField: true, FilesAPI: true, InterleavedThinking: true, Skills: true, ContainerBasic: true, Context1M: true, RedactThinking: true, TaskBudgets: true, EagerInputStreaming: true, diff --git a/core/providers/anthropic/utils.go b/core/providers/anthropic/utils.go index 268a332a2c..1cec0590be 100644 --- a/core/providers/anthropic/utils.go +++ b/core/providers/anthropic/utils.go @@ -5,6 +5,7 @@ import ( "context" "encoding/json" "fmt" + "slices" "strings" "github.com/bytedance/sonic" @@ -453,36 +454,44 @@ func StripUnsupportedFieldsFromRawBody(jsonBody []byte, provider schemas.ModelPr } } - // context_management.edits[] — gate per edit.type. - if editsResult := providerUtils.GetJSONField(jsonBody, "context_management.edits"); editsResult.Exists() && editsResult.IsArray() { - edits := editsResult.Array() - // Collect indices to drop (iterate forwards, delete in reverse). - dropIndices := []int{} - for i, edit := range edits { - editType := edit.Get("type").String() - keep := true - switch editType { - case string(ContextManagementEditTypeCompact): - keep = features.Compaction - case string(ContextManagementEditTypeClearToolUses), string(ContextManagementEditTypeClearThinking): - keep = features.ContextEditing - } - if !keep { - dropIndices = append(dropIndices, i) - } - } - if len(dropIndices) == len(edits) && len(edits) > 0 { - // All edits unsupported — drop the whole context_management. + // context_management — if the provider doesn't accept the field at all (e.g. Vertex), + // drop it entirely. Otherwise gate per edit.type. + if providerUtils.JSONFieldExists(jsonBody, "context_management") { + if !features.ContextManagementField { jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "context_management") if err != nil { return nil, fmt.Errorf("strip raw context_management: %w", err) } - } else { - for i := len(dropIndices) - 1; i >= 0; i-- { - path := fmt.Sprintf("context_management.edits.%d", dropIndices[i]) - jsonBody, err = providerUtils.DeleteJSONField(jsonBody, path) + } else if editsResult := providerUtils.GetJSONField(jsonBody, "context_management.edits"); editsResult.Exists() && editsResult.IsArray() { + edits := editsResult.Array() + // Collect indices to drop (iterate forwards, delete in reverse). + dropIndices := []int{} + for i, edit := range edits { + editType := edit.Get("type").String() + keep := true + switch editType { + case string(ContextManagementEditTypeCompact): + keep = features.Compaction + case string(ContextManagementEditTypeClearToolUses), string(ContextManagementEditTypeClearThinking): + keep = features.ContextEditing + } + if !keep { + dropIndices = append(dropIndices, i) + } + } + if len(dropIndices) == len(edits) { + // No edits to keep (either empty input or all unsupported) — drop the whole context_management. + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, "context_management") if err != nil { - return nil, fmt.Errorf("strip raw context_management.edits[%d]: %w", dropIndices[i], err) + return nil, fmt.Errorf("strip raw context_management: %w", err) + } + } else { + for i := len(dropIndices) - 1; i >= 0; i-- { + path := fmt.Sprintf("context_management.edits.%d", dropIndices[i]) + jsonBody, err = providerUtils.DeleteJSONField(jsonBody, path) + if err != nil { + return nil, fmt.Errorf("strip raw context_management.edits[%d]: %w", dropIndices[i], err) + } } } } @@ -1138,11 +1147,11 @@ var betaHeaderPrefixToFeature = map[string]func(ProviderFeatureSupport) bool{ // MergeBetaHeaders collects anthropic-beta values from provider ExtraHeaders and // per-request context headers, deduplicating them. -func MergeBetaHeaders(providerExtraHeaders map[string]string, ctx context.Context) []string { +func MergeBetaHeaders(ctx context.Context, providerExtraHeaders map[string]string) []string { seen := make(map[string]bool) var all []string add := func(v string) { - for _, part := range strings.Split(v, ",") { + for part := range strings.SplitSeq(v, ",") { if t := strings.TrimSpace(part); t != "" && !seen[t] { seen[t] = true all = append(all, t) @@ -1185,8 +1194,7 @@ func FilterBetaHeadersForProvider(headers []string, provider schemas.ModelProvid filtered := make([]string, 0, len(headers)) for _, h := range headers { - tokens := strings.Split(h, ",") - for _, token := range tokens { + for token := range strings.SplitSeq(h, ",") { token = strings.TrimSpace(token) if token == "" { @@ -1252,10 +1260,8 @@ func FilterBetaHeadersForProvider(headers []string, provider schemas.ModelProvid // appendUniqueHeader adds a header to the slice if not already present func appendUniqueHeader(slice []string, item string) []string { - for _, s := range slice { - if s == item { - return slice - } + if slices.Contains(slice, item) { + return slice } return append(slice, item) } @@ -1952,6 +1958,12 @@ func filterEnumValuesByType(enumValues []interface{}, schemaType string) []inter return filtered } +// NormalizeSchemaForAnthropic is the exported entry point for normalizeSchemaForAnthropic, +// used by providers (e.g. Bedrock) that share Anthropic's schema validation rules. +func NormalizeSchemaForAnthropic(schema map[string]interface{}) map[string]interface{} { + return normalizeSchemaForAnthropic(schema) +} + // normalizeSchemaForAnthropic recursively normalizes a JSON schema to be compatible with Anthropic's API. // This handles cases where: // 1. type is an array like ["string", "null"] - converted to single type diff --git a/core/providers/anthropic/utils_test.go b/core/providers/anthropic/utils_test.go index b3fab1097b..312a29d0a5 100644 --- a/core/providers/anthropic/utils_test.go +++ b/core/providers/anthropic/utils_test.go @@ -969,7 +969,7 @@ func TestMergeBetaHeaders(t *testing.T) { ctx.SetValue(schemas.BifrostContextKeyExtraHeaders, map[string][]string{ "Anthropic-Beta": {"structured-outputs-2025-11-13"}, }) - got := MergeBetaHeaders(nil, ctx) + got := MergeBetaHeaders(ctx, nil) want := []string{"structured-outputs-2025-11-13"} if !slices.Equal(got, want) { t.Fatalf("got %v, want %v", got, want) @@ -978,9 +978,9 @@ func TestMergeBetaHeaders(t *testing.T) { t.Run("provider_extra_headers_case_insensitive_key", func(t *testing.T) { ctx := schemas.NewBifrostContext(context.Background(), time.Time{}) - got := MergeBetaHeaders(map[string]string{ + got := MergeBetaHeaders(ctx, map[string]string{ "Anthropic-Beta": "mcp-client-2025-04-04", - }, ctx) + }) want := []string{"mcp-client-2025-04-04"} if !slices.Equal(got, want) { t.Fatalf("got %v, want %v", got, want) @@ -992,9 +992,9 @@ func TestMergeBetaHeaders(t *testing.T) { ctx.SetValue(schemas.BifrostContextKeyExtraHeaders, map[string][]string{ "ANTHROPIC-BETA": {"foo,bar", "bar,baz"}, }) - got := MergeBetaHeaders(map[string]string{ + got := MergeBetaHeaders(ctx, map[string]string{ "anthropic-beta": "foo", - }, ctx) + }) sort.Strings(got) wantSorted := []string{"bar", "baz", "foo"} if !slices.Equal(got, wantSorted) { @@ -1048,6 +1048,7 @@ func TestFilterBetaHeadersForProvider(t *testing.T) { AnthropicSkillsBetaHeader, AnthropicFastModeBetaHeader, AnthropicRedactThinkingBetaHeader, + AnthropicContextManagementBetaHeader, } for _, h := range unsupported { result := FilterBetaHeadersForProvider([]string{h}, schemas.Vertex) @@ -1061,7 +1062,6 @@ func TestFilterBetaHeadersForProvider(t *testing.T) { supported := []string{ AnthropicComputerUseBetaHeader20251124, AnthropicCompactionBetaHeader, - AnthropicContextManagementBetaHeader, AnthropicInterleavedThinkingBetaHeader, AnthropicContext1MBetaHeader, AnthropicEagerInputStreamingBetaHeader, @@ -1253,6 +1253,93 @@ func TestFilterBetaHeadersForProvider(t *testing.T) { } } +// TestNetworkConfigBetaOverridesFlow proves the production sequence +// FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, networkConfig.ExtraHeaders), provider, networkConfig.BetaHeaderOverrides) +// honours operator-configured BetaHeaderOverrides for each Anthropic-compatible provider. +// This is the exact call sequence used at anthropic.go:205, vertex.go:407, +// bedrock.go:208, and azure.go:259 — the wire layer where headers are set on the outbound request. +func TestNetworkConfigBetaOverridesFlow(t *testing.T) { + type pCase struct { + provider schemas.ModelProvider + droppedByDefault string + droppedByDefaultPfx string + allowedByDefault string + allowedByDefaultPfx string + } + cases := []pCase{ + {schemas.Anthropic, "interleaved-thinking-2025-05-14", AnthropicInterleavedThinkingBetaHeaderPrefix, + "prompt-caching-2024-07-31", "prompt-caching-"}, + {schemas.Vertex, "context-management-2025-06-27", AnthropicContextManagementBetaHeaderPrefix, + "interleaved-thinking-2025-05-14", AnthropicInterleavedThinkingBetaHeaderPrefix}, + {schemas.Bedrock, "files-api-2025-04-14", "files-api-", + "context-management-2025-06-27", AnthropicContextManagementBetaHeaderPrefix}, + {schemas.Azure, "fast-mode-2026-02-01", AnthropicFastModeBetaHeaderPrefix, + "context-management-2025-06-27", AnthropicContextManagementBetaHeaderPrefix}, + } + + for _, tc := range cases { + tc := tc + + t.Run(string(tc.provider)+"/override_enables_default_dropped", func(t *testing.T) { + if tc.provider == schemas.Anthropic { + t.Skip("Anthropic accepts all known betas by default") + } + ctx := schemas.NewBifrostContext(context.Background(), time.Time{}) + ctx.SetValue(schemas.BifrostContextKeyExtraHeaders, map[string][]string{ + AnthropicBetaHeader: {tc.droppedByDefault}, + }) + overrides := map[string]bool{tc.droppedByDefaultPfx: true} + got := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, nil), tc.provider, overrides) + if len(got) != 1 || got[0] != tc.droppedByDefault { + t.Fatalf("expected override to enable %q for %s, got %v", tc.droppedByDefault, tc.provider, got) + } + }) + + t.Run(string(tc.provider)+"/override_disables_default_allowed", func(t *testing.T) { + ctx := schemas.NewBifrostContext(context.Background(), time.Time{}) + ctx.SetValue(schemas.BifrostContextKeyExtraHeaders, map[string][]string{ + AnthropicBetaHeader: {tc.allowedByDefault}, + }) + overrides := map[string]bool{tc.allowedByDefaultPfx: false} + got := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, nil), tc.provider, overrides) + if len(got) != 0 { + t.Fatalf("expected override to disable %q for %s, got %v", tc.allowedByDefault, tc.provider, got) + } + }) + + t.Run(string(tc.provider)+"/override_only_affects_targeted_prefix", func(t *testing.T) { + const otherAllowed = "interleaved-thinking-2025-05-14" + if tc.allowedByDefaultPfx == AnthropicInterleavedThinkingBetaHeaderPrefix { + t.Skip("test fixture uses interleaved-thinking as the allowed beta") + } + ctx := schemas.NewBifrostContext(context.Background(), time.Time{}) + ctx.SetValue(schemas.BifrostContextKeyExtraHeaders, map[string][]string{ + AnthropicBetaHeader: {tc.allowedByDefault + "," + otherAllowed}, + }) + overrides := map[string]bool{tc.allowedByDefaultPfx: false} + got := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, nil), tc.provider, overrides) + if len(got) != 1 || got[0] != otherAllowed { + t.Fatalf("expected only %q to survive for %s, got %v", otherAllowed, tc.provider, got) + } + }) + + t.Run(string(tc.provider)+"/override_works_through_merge_with_provider_extra_headers", func(t *testing.T) { + ctx := schemas.NewBifrostContext(context.Background(), time.Time{}) + ctx.SetValue(schemas.BifrostContextKeyExtraHeaders, map[string][]string{ + AnthropicBetaHeader: {tc.allowedByDefault}, + }) + providerExtra := map[string]string{ + AnthropicBetaHeader: tc.allowedByDefault, + } + overrides := map[string]bool{tc.allowedByDefaultPfx: false} + got := FilterBetaHeadersForProvider(MergeBetaHeaders(ctx, providerExtra), tc.provider, overrides) + if len(got) != 0 { + t.Fatalf("expected override to drop %q from merged sources for %s, got %v", tc.allowedByDefault, tc.provider, got) + } + }) + } +} + func TestStripUnsupportedFieldsFromRawBody(t *testing.T) { t.Run("bedrock_strips_new_request_level_fields", func(t *testing.T) { // Raw body with every new typed field. Targeting Bedrock: speed (no FastMode), @@ -1283,6 +1370,38 @@ func TestStripUnsupportedFieldsFromRawBody(t *testing.T) { } }) + t.Run("vertex_strips_entire_context_management_field", func(t *testing.T) { + // Vertex rejects the context_management field entirely ("Extra inputs are not permitted"). + // This covers compact edits (Compaction:true keeps the beta header but not the body field) + // and clear edits (ContextEditing:false). + for _, editType := range []string{ + string(ContextManagementEditTypeCompact), + string(ContextManagementEditTypeClearToolUses), + string(ContextManagementEditTypeClearThinking), + } { + input := []byte(`{"model":"claude-sonnet-4-6","context_management":{"edits":[{"type":"` + editType + `"}]}}`) + result, err := StripUnsupportedFieldsFromRawBody(input, schemas.Vertex, "claude-sonnet-4-6") + if err != nil { + t.Fatalf("unexpected error for edit type %q: %v", editType, err) + } + if providerUtils.JSONFieldExists(result, "context_management") { + t.Errorf("expected context_management to be fully stripped for Vertex (edit type %q), got: %s", editType, string(result)) + } + } + }) + + t.Run("anthropic_keeps_context_management_per_edit_type", func(t *testing.T) { + // Anthropic supports context_management; compact edits are kept, clear edits are also kept. + input := []byte(`{"model":"claude-sonnet-4-6","context_management":{"edits":[{"type":"` + string(ContextManagementEditTypeCompact) + `"},{"type":"` + string(ContextManagementEditTypeClearToolUses) + `"}]}}`) + result, err := StripUnsupportedFieldsFromRawBody(input, schemas.Anthropic, "claude-sonnet-4-6") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !providerUtils.JSONFieldExists(result, "context_management") { + t.Errorf("expected context_management to be kept for Anthropic, got: %s", string(result)) + } + }) + t.Run("vertex_strips_mcp_strict_and_input_examples_via_feature_check", func(t *testing.T) { // Vertex: no MCP, no InputExamples, no StructuredOutputs. // tool.strict stripped; tool.input_examples stripped; mcp_servers stripped. diff --git a/core/providers/azure/azure.go b/core/providers/azure/azure.go index 9f4bc17763..4c2181fcc5 100644 --- a/core/providers/azure/azure.go +++ b/core/providers/azure/azure.go @@ -256,7 +256,7 @@ func (provider *AzureProvider) completeRequest( url = fmt.Sprintf("%s/%s", endpoint, path) // Merge ExtraHeaders + context anthropic-beta, filter for Azure, then set as HTTP header - if betaHeaders := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(provider.networkConfig.ExtraHeaders, ctx), schemas.Azure, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { + if betaHeaders := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(ctx, provider.networkConfig.ExtraHeaders), schemas.Azure, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { req.Header.Set(anthropic.AnthropicBetaHeader, strings.Join(betaHeaders, ",")) } else { req.Header.Del(anthropic.AnthropicBetaHeader) diff --git a/core/providers/bedrock/bedrock.go b/core/providers/bedrock/bedrock.go index 7e64a13e36..1b181a6452 100644 --- a/core/providers/bedrock/bedrock.go +++ b/core/providers/bedrock/bedrock.go @@ -159,6 +159,10 @@ func isStreamTransportError(err error) bool { if errors.Is(err, io.ErrUnexpectedEOF) { return true } + var checksumErr eventstream.ChecksumError + if errors.As(err, &checksumErr) { + return true + } var opErr *net.OpError var dnsErr *net.DNSError return errors.As(err, &opErr) || errors.As(err, &dnsErr) @@ -201,7 +205,7 @@ func (provider *BedrockProvider) completeRequest(ctx *schemas.BifrostContext, js // Set any extra headers from network config providerUtils.SetExtraHeadersHTTP(ctx, req, provider.networkConfig.ExtraHeaders, nil) - if filtered := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(provider.networkConfig.ExtraHeaders, ctx), schemas.Bedrock, provider.networkConfig.BetaHeaderOverrides); len(filtered) > 0 { + if filtered := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(ctx, provider.networkConfig.ExtraHeaders), schemas.Bedrock, provider.networkConfig.BetaHeaderOverrides); len(filtered) > 0 { req.Header.Set(anthropic.AnthropicBetaHeader, strings.Join(filtered, ",")) } else { req.Header.Del(anthropic.AnthropicBetaHeader) @@ -428,7 +432,7 @@ func (provider *BedrockProvider) makeStreamingRequest(ctx *schemas.BifrostContex // Set any extra headers from network config providerUtils.SetExtraHeadersHTTP(ctx, req, provider.networkConfig.ExtraHeaders, nil) - if filtered := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(provider.networkConfig.ExtraHeaders, ctx), schemas.Bedrock, provider.networkConfig.BetaHeaderOverrides); len(filtered) > 0 { + if filtered := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(ctx, provider.networkConfig.ExtraHeaders), schemas.Bedrock, provider.networkConfig.BetaHeaderOverrides); len(filtered) > 0 { req.Header.Set(anthropic.AnthropicBetaHeader, strings.Join(filtered, ",")) } else { req.Header.Del(anthropic.AnthropicBetaHeader) diff --git a/core/providers/bedrock/responses.go b/core/providers/bedrock/responses.go index 41c2fc248c..d66633293f 100644 --- a/core/providers/bedrock/responses.go +++ b/core/providers/bedrock/responses.go @@ -1845,6 +1845,7 @@ func ToBedrockResponsesRequest(ctx *schemas.BifrostContext, bifrostReq *schemas. bedrockReq.AdditionalModelRequestFields = schemas.NewOrderedMap() } setOutputConfigField(bedrockReq.AdditionalModelRequestFields, "format", anthropicOutputFormat) + appendAnthropicBetaToFields(bedrockReq.AdditionalModelRequestFields, anthropic.AnthropicStructuredOutputsBetaHeader) } // Defer synthetic tool injection until after normal tool/tool_choice conversion // so the structured-output tool is not overwritten by the later pass. diff --git a/core/providers/bedrock/utils.go b/core/providers/bedrock/utils.go index 4eb48452a0..4bf9a304e7 100644 --- a/core/providers/bedrock/utils.go +++ b/core/providers/bedrock/utils.go @@ -83,6 +83,10 @@ func convertChatParameters(ctx *schemas.BifrostContext, bifrostReq *schemas.Bifr bedrockReq.AdditionalModelRequestFields = schemas.NewOrderedMap() } setOutputConfigField(bedrockReq.AdditionalModelRequestFields, "format", anthropicOutputFormat) + // The outer HTTP anthropic-beta header is consumed by Bedrock's edge and not forwarded + // to the underlying Claude model, so the beta value must also live in + // additionalModelRequestFields for the model to recognise output_config.format. + appendAnthropicBetaToFields(bedrockReq.AdditionalModelRequestFields, anthropic.AnthropicStructuredOutputsBetaHeader) } // Filter provider-unsupported server tools once; both convertToolConfig and @@ -114,8 +118,8 @@ func convertChatParameters(ctx *schemas.BifrostContext, bifrostReq *schemas.Bifr bedrockReq.AdditionalModelRequestFields = schemas.NewOrderedMap() } bedrockReq.AdditionalModelRequestFields.Set("tools", serverTools) - if len(betaHeaders) > 0 { - bedrockReq.AdditionalModelRequestFields.Set("anthropic_beta", betaHeaders) + for _, h := range betaHeaders { + appendAnthropicBetaToFields(bedrockReq.AdditionalModelRequestFields, h) } // Skip the tunneled tool_choice when response_format forces the synthetic // bf_so_* tool at lines 263-275 below; otherwise Bedrock receives two @@ -479,12 +483,52 @@ func mergeOrderedMapInto(dst, src *schemas.OrderedMap) { } func newAnthropicOutputFormatOrderedMap(schemaObj any) *schemas.OrderedMap { + // Normalize multi-type arrays (["string","null"], ["string","integer"]) into anyOf branches + // so Bedrock's schema validator accepts them. Pure in-memory map ops; no JSON round-trips. + // OrderedMap schemas are passed through unchanged. + if m, ok := schemaObj.(map[string]interface{}); ok { + schemaObj = anthropic.NormalizeSchemaForAnthropic(m) + } return schemas.NewOrderedMapFromPairs( schemas.KV("type", "json_schema"), schemas.KV("schema", schemaObj), ) } +// appendAnthropicBetaToFields merges a single beta header value into +// additionalModelRequestFields.anthropic_beta without creating duplicates. +// This is needed for Bedrock: the outer HTTP anthropic-beta header is consumed +// by Bedrock's edge and NOT forwarded to the underlying Claude model; the value +// must live in additionalModelRequestFields so Bedrock passes it through. +func appendAnthropicBetaToFields(fields *schemas.OrderedMap, header string) { + if fields == nil || header == "" { + return + } + var existing []string + if raw, ok := fields.Get("anthropic_beta"); ok { + switch v := raw.(type) { + case []string: + existing = v + case []interface{}: + for _, item := range v { + if s, ok := item.(string); ok { + existing = append(existing, s) + } + } + case string: + if v != "" { + existing = []string{v} + } + } + } + for _, h := range existing { + if h == header { + return + } + } + fields.Set("anthropic_beta", append(existing, header)) +} + // ensureChatToolConfigForConversation ensures toolConfig is present when tool content exists func ensureChatToolConfigForConversation(bifrostReq *schemas.BifrostChatRequest, bedrockReq *BedrockConverseRequest) { if bedrockReq.ToolConfig != nil { diff --git a/core/providers/openai/openai.go b/core/providers/openai/openai.go index 2197de92e2..9c5997a648 100644 --- a/core/providers/openai/openai.go +++ b/core/providers/openai/openai.go @@ -983,9 +983,9 @@ func HandleOpenAIChatCompletionStreaming( } reqBody := ToOpenAIChatRequest(ctx, request) if reqBody != nil { - reqBody.Stream = schemas.Ptr(true) + reqBody.Stream = new(true) reqBody.StreamOptions = &schemas.ChatStreamOptions{ - IncludeUsage: schemas.Ptr(true), + IncludeUsage: new(true), } if postRequestConverter != nil { reqBody = postRequestConverter(reqBody) @@ -1149,7 +1149,6 @@ func HandleOpenAIChatCompletionStreaming( // Parse into bifrost response var response schemas.BifrostChatResponse - // TODO fix this if customResponseHandler != nil { rawRequest, rawResponse, handlerErr := customResponseHandler([]byte(jsonData), &response, nil, sendBackRawRequest, sendBackRawResponse) if handlerErr != nil { diff --git a/core/providers/vertex/utils.go b/core/providers/vertex/utils.go index 843db7d092..d882cc6cb8 100644 --- a/core/providers/vertex/utils.go +++ b/core/providers/vertex/utils.go @@ -13,7 +13,7 @@ import ( // Compared to the native Anthropic path, it strips model/region fields, remaps tool versions, injects beta headers // into the request body (rather than HTTP headers), and pins the Anthropic API version to DefaultVertexAnthropicVersion. func getRequestBodyForAnthropicResponses(ctx *schemas.BifrostContext, request *schemas.BifrostResponsesRequest, deployment string, isStreaming bool, isCountTokens bool, betaHeaderOverrides map[string]bool, providerExtraHeaders map[string]string, shouldSendBackRawRequest bool, shouldSendBackRawResponse bool) ([]byte, *schemas.BifrostError) { - return anthropic.BuildAnthropicResponsesRequestBody(ctx, request, anthropic.AnthropicRequestBuildConfig{ + jsonBody, buildErr := anthropic.BuildAnthropicResponsesRequestBody(ctx, request, anthropic.AnthropicRequestBuildConfig{ Provider: schemas.Vertex, Deployment: deployment, DeleteModelField: true, @@ -31,6 +31,14 @@ func getRequestBodyForAnthropicResponses(ctx *schemas.BifrostContext, request *s ShouldSendBackRawRequest: shouldSendBackRawRequest, ShouldSendBackRawResponse: shouldSendBackRawResponse, }) + if buildErr != nil { + return nil, buildErr + } + stripped, err := anthropic.StripUnsupportedFieldsFromRawBody(jsonBody, schemas.Vertex, deployment) + if err != nil { + return nil, providerUtils.NewBifrostOperationError(err.Error(), nil) + } + return stripped, nil } // getCompleteURLForGeminiEndpoint constructs the complete URL for the Gemini endpoint, for both streaming and non-streaming requests diff --git a/core/providers/vertex/vertex.go b/core/providers/vertex/vertex.go index 34e906d0bc..18515e9fdf 100644 --- a/core/providers/vertex/vertex.go +++ b/core/providers/vertex/vertex.go @@ -404,7 +404,7 @@ func (provider *VertexProvider) ChatCompletion(ctx *schemas.BifrostContext, key } } // Inject beta headers into body as anthropic_beta (Vertex uses body field, not HTTP header) - if betaHeaders := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(provider.networkConfig.ExtraHeaders, ctx), schemas.Vertex, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { + if betaHeaders := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(ctx, provider.networkConfig.ExtraHeaders), schemas.Vertex, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { rawBody, err = providerUtils.SetJSONField(rawBody, "anthropic_beta", betaHeaders) if err != nil { return nil, fmt.Errorf("failed to set anthropic_beta: %w", err) @@ -474,6 +474,13 @@ func (provider *VertexProvider) ChatCompletion(ctx *schemas.BifrostContext, key return nil, providerUtils.NewBifrostOperationError(remapErr.Error(), nil) } jsonBody = remappedBody + + // Strip unsupported body fields for Vertex — covers both structured and raw passthrough paths. + var stripErr error + jsonBody, stripErr = anthropic.StripUnsupportedFieldsFromRawBody(jsonBody, schemas.Vertex, request.Model) + if stripErr != nil { + return nil, providerUtils.NewBifrostOperationError(stripErr.Error(), nil) + } } // Auth query is used for fine-tuned models to pass the API key in the query string @@ -726,7 +733,7 @@ func (provider *VertexProvider) ChatCompletionStream(ctx *schemas.BifrostContext } } // Inject beta headers into body as anthropic_beta (Vertex uses body field, not HTTP header) - if betaHeaders := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(provider.networkConfig.ExtraHeaders, ctx), schemas.Vertex, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { + if betaHeaders := anthropic.FilterBetaHeadersForProvider(anthropic.MergeBetaHeaders(ctx, provider.networkConfig.ExtraHeaders), schemas.Vertex, provider.networkConfig.BetaHeaderOverrides); len(betaHeaders) > 0 { rawBody, err = providerUtils.SetJSONField(rawBody, "anthropic_beta", betaHeaders) if err != nil { return nil, fmt.Errorf("failed to set anthropic_beta: %w", err) @@ -756,6 +763,13 @@ func (provider *VertexProvider) ChatCompletionStream(ctx *schemas.BifrostContext if remapErr != nil { return nil, providerUtils.NewBifrostOperationError(remapErr.Error(), nil) } + + // Strip unsupported body fields for Vertex — covers both structured and raw passthrough paths. + var stripErr error + jsonData, stripErr = anthropic.StripUnsupportedFieldsFromRawBody(jsonData, schemas.Vertex, request.Model) + if stripErr != nil { + return nil, providerUtils.NewBifrostOperationError(stripErr.Error(), nil) + } } var completeURL string diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go index e43ba0ccbf..12ea4b2b07 100644 --- a/core/schemas/bifrost.go +++ b/core/schemas/bifrost.go @@ -315,9 +315,10 @@ type KeyAttemptRecord struct { // RoutingEngineLogEntry represents a log entry from a routing engine // format: [timestamp] [engine] - message type RoutingEngineLogEntry struct { - Engine string // e.g., "governance", "routing-rule", "openrouter" - Message string // Human-readable decision/action message - Timestamp int64 // Unix milliseconds + Engine string `json:"engine"` // e.g., "governance", "routing-rule", "openrouter" + Level LogLevel `json:"level"` + Message string `json:"message"` // Human-readable decision/action message + Timestamp int64 `json:"timestamp"` // Unix milliseconds } // PluginLogEntry represents a structured log entry emitted by a plugin via ctx.Log(). diff --git a/core/schemas/context.go b/core/schemas/context.go index 68ac7c435e..5c2760b065 100644 --- a/core/schemas/context.go +++ b/core/schemas/context.go @@ -359,9 +359,10 @@ func (bc *BifrostContext) GetParentCtxWithUserValues() context.Context { // - ctx: The Bifrost context // - engineName: Name of the routing engine (e.g., "governance", "routing-rule") // - message: Human-readable log message describing the decision/action -func (bc *BifrostContext) AppendRoutingEngineLog(engineName string, message string) { +func (bc *BifrostContext) AppendRoutingEngineLog(engineName string, level LogLevel, message string) { entry := RoutingEngineLogEntry{ Engine: engineName, + Level: level, Message: message, Timestamp: time.Now().UnixMilli(), } diff --git a/core/schemas/envvar.go b/core/schemas/envvar.go index 31c7892918..c306f22fe6 100644 --- a/core/schemas/envvar.go +++ b/core/schemas/envvar.go @@ -307,6 +307,20 @@ func (e *EnvVar) IsFromEnv() bool { return e.FromEnv } +// ShouldPreserveStored returns true when the EnvVar is a client-side placeholder +// that should not overwrite the stored encrypted credential. Returns true for a +// nil receiver, an empty non-env value, or a redacted non-env value. Returns false +// for env var references (always intentional) and plain non-empty values. +func (e *EnvVar) ShouldPreserveStored() bool { + if e == nil { + return true + } + if e.IsFromEnv() { + return false + } + return e.GetValue() == "" || e.IsRedacted() +} + // IsSet returns true if the EnvVar has a resolved value or an environment variable reference. // This should be used instead of GetValue() != "" when checking whether a field was configured, // because env var references may have an empty Val before resolution (e.g., when the env var diff --git a/docs/changelogs/v1.4.23.mdx b/docs/changelogs/v1.4.23.mdx index 2a5f8d2049..d66d82cbcc 100644 --- a/docs/changelogs/v1.4.23.mdx +++ b/docs/changelogs/v1.4.23.mdx @@ -2,17 +2,11 @@ title: "v1.4.23" description: "v1.4.23 changelog - 2026-04-18" --- + - - ```bash - npx -y @maximhq/bifrost --transport-version v1.4.23 - ``` - + ```bash npx -y @maximhq/bifrost --transport-version v1.4.23 ``` - ```bash - docker pull maximhq/bifrost:v1.4.23 - docker run -p 8080:8080 maximhq/bifrost:v1.4.23 - ``` + ```bash docker pull maximhq/bifrost:v1.4.23 docker run -p 8080:8080 maximhq/bifrost:v1.4.23 ``` diff --git a/docs/changelogs/v1.5.0-prerelease4.mdx b/docs/changelogs/v1.5.0-prerelease4.mdx index 553e857c4a..ac48c9e828 100644 --- a/docs/changelogs/v1.5.0-prerelease4.mdx +++ b/docs/changelogs/v1.5.0-prerelease4.mdx @@ -2,16 +2,11 @@ title: "v1.5.0-prerelease4" description: "v1.5.0-prerelease4 changelog - 2026-04-21" --- + - - ```bash - npx -y @maximhq/bifrost --transport-version v1.5.0-prerelease4 - ``` - + ```bash npx -y @maximhq/bifrost --transport-version v1.5.0-prerelease4 ``` - ```bash - docker pull maximhq/bifrost:v1.5.0-prerelease4 - docker run -p 8080:8080 maximhq/bifrost:v1.5.0-prerelease4 + ```bash docker pull maximhq/bifrost:v1.5.0-prerelease4 docker run -p 8080:8080 maximhq/bifrost:v1.5.0-prerelease4 ``` diff --git a/docs/deployment-guides/config-json/guardrails.mdx b/docs/deployment-guides/config-json/guardrails.mdx index 3b07cc3b8c..3848c61c2f 100644 --- a/docs/deployment-guides/config-json/guardrails.mdx +++ b/docs/deployment-guides/config-json/guardrails.mdx @@ -8,9 +8,9 @@ icon: "shield-halved" Guardrails are an **enterprise-only** feature and require the enterprise Bifrost image. - -`env.*` references are currently **not supported** inside `guardrails_config` (including provider `config` values). Use explicit values in `guardrails_config` for now. - + +Credential and endpoint fields in guardrail provider `config` blocks support `"env.VAR_NAME"` strings (e.g. `"env.AWS_SECRET_KEY"`). Bifrost resolves the value from the process environment at startup. See the [Environment Variable Support](#environment-variable-support) section for the complete per-provider field list. + Guardrails are configured under `guardrails_config` in `config.json`. The configuration has two parts: @@ -42,7 +42,7 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S { "pattern": "AKIA[0-9A-Z]{16}", "description": "AWS access key" }, { "pattern": "gh[ps]_[A-Za-z0-9]{36}", "description": "GitHub token", "flags": "i" } ], - "mode": "block" + "sampling_rate": 100 } } ] @@ -53,6 +53,8 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S +Supports three auth modes: `keys` (static credentials), `api_key` (Bedrock API key), and `iam_role` (ambient IAM/IRSA — no explicit credentials required). + ```json { "guardrails_config": { @@ -64,11 +66,16 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S "enabled": true, "timeout": 15, "config": { - "guardrail_arn": "arn:aws:bedrock:us-east-1::guardrail/abc123", + "guardrail_arn": "env.BEDROCK_GUARDRAIL_ARN", "guardrail_version": "DRAFT", - "region": "us-east-1", - "access_key": "YOUR_AWS_ACCESS_KEY_ID", - "secret_key": "YOUR_AWS_SECRET_ACCESS_KEY" + "region": "env.AWS_REGION", + + "auth_type": "keys", + "access_key": "env.AWS_ACCESS_KEY_ID", + "secret_key": "env.AWS_SECRET_ACCESS_KEY", + "session_token": "env.AWS_SESSION_TOKEN", + + "sampling_rate": 100 } } ] @@ -76,9 +83,29 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S } ``` +For `auth_type: "api_key"`: +```json +{ + "auth_type": "api_key", + "bedrock_api_key": "env.BEDROCK_API_KEY" +} +``` + +For `auth_type: "iam_role"` (no credentials — uses ambient IAM): +```json +{ + "auth_type": "iam_role", + "role_arn": "env.AWS_ROLE_ARN", + "external_id": "env.AWS_EXTERNAL_ID", + "session_name": "env.AWS_SESSION_NAME" +} +``` + +Supports three auth modes: `api_key`, `default_credential` (managed identity / Azure CLI), and `entra_id` (service principal). + ```json { "guardrails_config": { @@ -90,15 +117,19 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S "enabled": true, "timeout": 10, "config": { - "endpoint": "https://your-resource.cognitiveservices.azure.com", - "api_key": "YOUR_AZURE_CONTENT_SAFETY_KEY", + "endpoint": "env.AZURE_CONTENT_SAFETY_ENDPOINT", + + "auth_type": "api_key", + "api_key": "env.AZURE_CONTENT_SAFETY_KEY", + "analyze_enabled": true, "analyze_severity_threshold": "medium", "jailbreak_shield_enabled": true, "indirect_attack_shield_enabled": true, "copyright_enabled": false, "text_blocklist_enabled": false, - "blocklist_names": [] + "blocklist_names": [], + "sampling_rate": 100 } } ] @@ -106,10 +137,27 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S } ``` +For `auth_type: "entra_id"` (service principal): +```json +{ + "auth_type": "entra_id", + "client_id": "env.AZURE_CLIENT_ID", + "client_secret": "env.AZURE_CLIENT_SECRET", + "tenant_id": "env.AZURE_TENANT_ID" +} +``` + +For `auth_type: "default_credential"` (managed identity / Azure CLI — no credentials needed): +```json +{ + "auth_type": "default_credential" +} +``` + `analyze_severity_threshold` accepts `"low"`, `"medium"`, or `"high"`. - + ```json { @@ -117,17 +165,43 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S "guardrail_providers": [ { "id": 4, + "provider_name": "patronus-ai", + "policy_name": "patronus-eval", + "enabled": true, + "timeout": 30, + "config": { + "api_key": "env.PATRONUS_API_KEY", + "environment": "production", + "sampling_rate": 100 + } + } + ] + } +} +``` + + + + +```json +{ + "guardrails_config": { + "guardrail_providers": [ + { + "id": 5, "provider_name": "grayswan", "policy_name": "grayswan-jailbreak", "enabled": true, "timeout": 15, "config": { - "api_key": "YOUR_GRAYSWAN_API_KEY", - "violation_threshold": 0.7, + "api_key": "env.GRAYSWAN_API_KEY", + "base_url": "env.GRAYSWAN_BASE_URL", "reasoning_mode": "standard", + "violation_threshold": 0.7, "policy_id": "", "policy_ids": [], - "rules": {} + "rules": {}, + "sampling_rate": 100 } } ] @@ -143,7 +217,7 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S | Field | Required | Description | |-------|----------|-------------| | `id` | Yes | Unique integer ID — referenced by rules via `provider_config_ids` | -| `provider_name` | Yes | Backend: `"regex"`, `"bedrock"`, `"azure"`, `"grayswan"` | +| `provider_name` | Yes | Backend: `"regex"`, `"bedrock"`, `"azure"`, `"patronus-ai"`, `"grayswan"` | | `policy_name` | Yes | Human-readable policy label | | `enabled` | Yes | `true` to activate | | `timeout` | No | Execution timeout in seconds | @@ -151,6 +225,81 @@ Runs entirely in-process with no external dependency. Patterns use RE2 syntax. S --- +## Environment Variable Support + +Any field marked **env.\* supported** accepts a bare `"env.VAR_NAME"` string in addition to a literal value. Bifrost resolves the variable from the process environment at startup. Fields marked **plain only** must be a literal value (boolean, number, array, or string). + +### AWS Bedrock + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `guardrail_arn` | Yes | Yes | ARN of the Bedrock guardrail | +| `guardrail_version` | Yes | Yes | `"DRAFT"` or a published version number | +| `region` | Yes | Yes | AWS region (e.g. `"us-east-1"`) | +| `auth_type` | No | Yes | `"keys"` (default) \| `"api_key"` \| `"iam_role"` | +| `access_key` | Conditional | Yes | Required when `auth_type="keys"` | +| `secret_key` | Conditional | Yes | Required when `auth_type="keys"` | +| `session_token` | No | Yes | Optional temporary session token | +| `bedrock_api_key` | Conditional | Yes | Required when `auth_type="api_key"` | +| `role_arn` | No | Yes | IAM role ARN to assume (optional, `auth_type="iam_role"`) | +| `external_id` | No | Yes | External ID for role assumption | +| `session_name` | No | Yes | Session name for role assumption | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Azure Content Safety + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `endpoint` | Yes | Yes | Azure Content Safety resource URL | +| `auth_type` | No | Yes | `"api_key"` (default) \| `"default_credential"` \| `"entra_id"` | +| `api_key` | Conditional | Yes | Required when `auth_type="api_key"` | +| `client_id` | Conditional | Yes | Required when `auth_type="entra_id"` | +| `client_secret` | Conditional | Yes | Required when `auth_type="entra_id"` | +| `tenant_id` | Conditional | Yes | Required when `auth_type="entra_id"` | +| `analyze_severity_threshold` | No | Yes | `"low"` \| `"medium"` \| `"high"` (default: `"medium"`) | +| `analyze_enabled` | No | **Plain only** | Enable text analysis (default: `true`) | +| `jailbreak_shield_enabled` | No | **Plain only** | Enable jailbreak detection (default: `false`) | +| `indirect_attack_shield_enabled` | No | **Plain only** | Enable indirect attack detection (default: `false`) | +| `copyright_enabled` | No | **Plain only** | Enable copyright detection (default: `false`) | +| `text_blocklist_enabled` | No | **Plain only** | Enable custom blocklists (default: `false`) | +| `scopes` | No | **Plain only** | OAuth scopes (string array) | +| `blocklist_names` | No | **Plain only** | Blocklist names to apply (string array) | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Patronus AI + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `api_key` | Yes | Yes | Patronus AI API key | +| `environment` | No | Yes | `"production"` (default) \| `"development"` | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Gray Swan + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `api_key` | Yes | Yes | Gray Swan API key | +| `base_url` | No | Yes | Custom API base URL (uses Gray Swan default if unset) | +| `reasoning_mode` | No | Yes | `"standard"` \| `"fast"` \| `"off"` (default: `"standard"`) | +| `violation_threshold` | No | **Plain only** | `0.0`–`1.0`; higher = more permissive (default: `0.5`) | +| `policy_id` | No | **Plain only** | Single policy ID string | +| `policy_ids` | No | **Plain only** | Multiple policy IDs (string array) | +| `rules` | No | **Plain only** | Inline rule map (`{ "rule_name": "description" }`) | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Regex + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `patterns` | Yes | **Plain only** | Array of `{ pattern, description?, flags? }` objects | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | + +--- + ## Rules Rules are CEL expressions that fire when their condition matches. Available CEL variables: @@ -198,7 +347,7 @@ Rules are CEL expressions that fire when their condition matches. Available CEL "apply_to": "input", "sampling_rate": 50, "timeout": 20, - "provider_config_ids": [4] + "provider_config_ids": [5] } ] } @@ -246,8 +395,7 @@ Rules are CEL expressions that fire when their condition matches. Available CEL "patterns": [ { "pattern": "sk-[A-Za-z0-9]{20,}", "description": "OpenAI API key" }, { "pattern": "AKIA[0-9A-Z]{16}", "description": "AWS access key" } - ], - "mode": "block" + ] } }, { @@ -257,8 +405,8 @@ Rules are CEL expressions that fire when their condition matches. Available CEL "enabled": true, "timeout": 10, "config": { - "endpoint": "https://your-resource.cognitiveservices.azure.com", - "api_key": "YOUR_AZURE_CONTENT_SAFETY_KEY", + "endpoint": "env.AZURE_CONTENT_SAFETY_ENDPOINT", + "api_key": "env.AZURE_CONTENT_SAFETY_KEY", "analyze_enabled": true, "analyze_severity_threshold": "medium", "jailbreak_shield_enabled": true, diff --git a/docs/deployment-guides/helm/guardrails.mdx b/docs/deployment-guides/helm/guardrails.mdx index b4fef56741..f5bd13feb5 100644 --- a/docs/deployment-guides/helm/guardrails.mdx +++ b/docs/deployment-guides/helm/guardrails.mdx @@ -8,9 +8,9 @@ icon: "shield-halved" Guardrails are an **enterprise-only** feature. They require the enterprise Bifrost image. - -`env.*` references are currently **not supported** in `bifrost.guardrails` provider `config` values. Use explicit values there for now. - + +Credential and endpoint fields in guardrail provider `config` blocks support `env.*` references (e.g. `env.AWS_SECRET_ACCESS_KEY`). Bifrost resolves the value from the process environment at startup. See the [Environment Variable Support](#environment-variable-support) section for the complete per-provider field list. + Guardrails are configured under `bifrost.guardrails` in your values file. The configuration has two parts: @@ -44,11 +44,14 @@ bifrost: flags: "i" - pattern: "gh[ps]_[A-Za-z0-9]{36}" description: "GitHub token" + sampling_rate: 100 ``` +Supports three auth modes: `keys` (static credentials), `api_key` (Bedrock API key), and `iam_role` (ambient IAM/IRSA — no explicit credentials required). + ```yaml bifrost: guardrails: @@ -59,16 +62,35 @@ bifrost: enabled: true timeout: 15 config: - guardrail_arn: "arn:aws:bedrock:us-east-1::guardrail/abc123" - guardrail_version: "DRAFT" # or a published version number - region: "us-east-1" - access_key: "YOUR_AWS_ACCESS_KEY_ID" # omit to use instance role - secret_key: "YOUR_AWS_SECRET_ACCESS_KEY" + # Required fields + guardrail_arn: "env.BEDROCK_GUARDRAIL_ARN" + guardrail_version: "DRAFT" # or a published version number + region: "env.AWS_REGION" + + # Auth: keys (default) + auth_type: "keys" + access_key: "env.AWS_ACCESS_KEY_ID" + secret_key: "env.AWS_SECRET_ACCESS_KEY" + session_token: "env.AWS_SESSION_TOKEN" # optional + + # Auth: api_key (alternative) + # auth_type: "api_key" + # bedrock_api_key: "env.BEDROCK_API_KEY" + + # Auth: iam_role (ambient — no credentials needed) + # auth_type: "iam_role" + # role_arn: "env.AWS_ROLE_ARN" # optional: assume specific role + # external_id: "env.AWS_EXTERNAL_ID" # optional + # session_name: "env.AWS_SESSION_NAME" # optional + + sampling_rate: 100 ``` +Supports three auth modes: `api_key`, `default_credential` (managed identity / Azure CLI), and `entra_id` (service principal). + ```yaml bifrost: guardrails: @@ -79,36 +101,74 @@ bifrost: enabled: true timeout: 10 config: - endpoint: "https://your-resource.cognitiveservices.azure.com" - api_key: "YOUR_AZURE_CONTENT_SAFETY_KEY" + # Required field + endpoint: "env.AZURE_CONTENT_SAFETY_ENDPOINT" + + # Auth: api_key (default) + auth_type: "api_key" + api_key: "env.AZURE_CONTENT_SAFETY_KEY" + + # Auth: default_credential (managed identity / Azure CLI — no credentials needed) + # auth_type: "default_credential" + + # Auth: entra_id (service principal) + # auth_type: "entra_id" + # client_id: "env.AZURE_CLIENT_ID" + # client_secret: "env.AZURE_CLIENT_SECRET" + # tenant_id: "env.AZURE_TENANT_ID" + + # Feature toggles (plain booleans — no env.* support) analyze_enabled: true - analyze_severity_threshold: "medium" # low | medium | high + analyze_severity_threshold: "medium" # low | medium | high (env.* supported) jailbreak_shield_enabled: true indirect_attack_shield_enabled: true copyright_enabled: false text_blocklist_enabled: false blocklist_names: [] + sampling_rate: 100 ``` - + ```yaml bifrost: guardrails: providers: - id: 4 + provider_name: "patronus-ai" + policy_name: "patronus-eval" + enabled: true + timeout: 30 + config: + api_key: "env.PATRONUS_API_KEY" + environment: "production" # production | development (env.* supported) + sampling_rate: 100 +``` + + + + +```yaml +bifrost: + guardrails: + providers: + - id: 5 provider_name: "grayswan" policy_name: "grayswan-jailbreak" enabled: true timeout: 15 config: - api_key: "YOUR_GRAYSWAN_API_KEY" - violation_threshold: 0.7 # 0.0–1.0; higher = more permissive - reasoning_mode: "standard" # standard | fast - policy_id: "" # optional: single policy ID - policy_ids: [] # optional: multiple policy IDs - rules: {} # optional: inline rule map + api_key: "env.GRAYSWAN_API_KEY" + base_url: "env.GRAYSWAN_BASE_URL" # optional custom endpoint (env.* supported) + reasoning_mode: "standard" # standard | fast | off (env.* supported) + + # Plain-value fields (no env.* support) + violation_threshold: 0.7 # 0.0–1.0; higher = more permissive + policy_id: "" # optional: single policy ID + policy_ids: [] # optional: multiple policy IDs + rules: {} # optional: inline rule map + sampling_rate: 100 ``` @@ -116,6 +176,81 @@ bifrost: --- +## Environment Variable Support + +Any field marked **env.\* supported** below accepts a bare `"env.VAR_NAME"` string in addition to a literal value. Bifrost resolves the variable from the process environment at startup. Fields marked **plain only** must be a literal value (boolean, number, array, or string). + +### AWS Bedrock + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `guardrail_arn` | Yes | Yes | ARN of the Bedrock guardrail | +| `guardrail_version` | Yes | Yes | `"DRAFT"` or a published version number | +| `region` | Yes | Yes | AWS region (e.g. `"us-east-1"`) | +| `auth_type` | No | Yes | `"keys"` (default) \| `"api_key"` \| `"iam_role"` | +| `access_key` | Conditional | Yes | Required when `auth_type="keys"` | +| `secret_key` | Conditional | Yes | Required when `auth_type="keys"` | +| `session_token` | No | Yes | Optional temporary session token | +| `bedrock_api_key` | Conditional | Yes | Required when `auth_type="api_key"` | +| `role_arn` | No | Yes | IAM role ARN to assume (optional, `auth_type="iam_role"`) | +| `external_id` | No | Yes | External ID for role assumption | +| `session_name` | No | Yes | Session name for role assumption | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Azure Content Safety + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `endpoint` | Yes | Yes | Azure Content Safety resource URL | +| `auth_type` | No | Yes | `"api_key"` (default) \| `"default_credential"` \| `"entra_id"` | +| `api_key` | Conditional | Yes | Required when `auth_type="api_key"` | +| `client_id` | Conditional | Yes | Required when `auth_type="entra_id"` | +| `client_secret` | Conditional | Yes | Required when `auth_type="entra_id"` | +| `tenant_id` | Conditional | Yes | Required when `auth_type="entra_id"` | +| `analyze_severity_threshold` | No | Yes | `"low"` \| `"medium"` \| `"high"` (default: `"medium"`) | +| `analyze_enabled` | No | **Plain only** | Enable text analysis (default: `true`) | +| `jailbreak_shield_enabled` | No | **Plain only** | Enable jailbreak detection (default: `false`) | +| `indirect_attack_shield_enabled` | No | **Plain only** | Enable indirect attack detection (default: `false`) | +| `copyright_enabled` | No | **Plain only** | Enable copyright detection (default: `false`) | +| `text_blocklist_enabled` | No | **Plain only** | Enable custom blocklists (default: `false`) | +| `scopes` | No | **Plain only** | OAuth scopes (string array) | +| `blocklist_names` | No | **Plain only** | Blocklist names to apply (string array) | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Patronus AI + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `api_key` | Yes | Yes | Patronus AI API key | +| `environment` | No | Yes | `"production"` (default) \| `"development"` | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Gray Swan + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `api_key` | Yes | Yes | Gray Swan API key | +| `base_url` | No | Yes | Custom API base URL (uses Gray Swan default if unset) | +| `reasoning_mode` | No | Yes | `"standard"` \| `"fast"` \| `"off"` (default: `"standard"`) | +| `violation_threshold` | No | **Plain only** | `0.0`–`1.0`; higher = more permissive (default: `0.5`) | +| `policy_id` | No | **Plain only** | Single policy ID string | +| `policy_ids` | No | **Plain only** | Multiple policy IDs (string array) | +| `rules` | No | **Plain only** | Inline rule map (`{ "rule_name": "description" }`) | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | +| `timeout` | No | **Plain only** | Execution timeout in seconds | + +### Regex + +| Field | Required | env.\* supported | Notes | +|-------|----------|-----------------|-------| +| `patterns` | Yes | **Plain only** | Array of `{ pattern, description?, flags? }` objects | +| `sampling_rate` | No | **Plain only** | `0`–`100`; percentage of requests to evaluate (default: `100`) | + +--- + ## Rules Rules are CEL expressions that fire when their condition is met. Available CEL variables: @@ -175,7 +310,7 @@ bifrost: apply_to: "input" sampling_rate: 50 timeout: 20 - provider_config_ids: [4] + provider_config_ids: [5] - id: 104 name: "strict-team-check" @@ -223,8 +358,8 @@ bifrost: enabled: true timeout: 10 config: - endpoint: "https://your-resource.cognitiveservices.azure.com" - api_key: "YOUR_AZURE_CONTENT_SAFETY_KEY" + endpoint: "env.AZURE_CONTENT_SAFETY_ENDPOINT" + api_key: "env.AZURE_CONTENT_SAFETY_KEY" analyze_enabled: true analyze_severity_threshold: "medium" jailbreak_shield_enabled: true @@ -256,4 +391,4 @@ bifrost: ```bash helm install bifrost bifrost/bifrost -f guardrails-values.yaml -``` \ No newline at end of file +``` diff --git a/docs/enterprise/user-provisioning.mdx b/docs/enterprise/user-provisioning.mdx index 14a7e182b8..ea39979d0a 100644 --- a/docs/enterprise/user-provisioning.mdx +++ b/docs/enterprise/user-provisioning.mdx @@ -85,7 +85,7 @@ All providers share the same outer config shape in `config.json`: { "scim_config": { "enabled": true, - "provider": "okta | entra | zitadel | keycloak | google", + "provider": "okta | entra | zitadel | keycloak | google | sailpoint", "config": { "...": "provider-specific fields — see each IdP guide" } @@ -112,6 +112,108 @@ Provider-specific fields (domain, tenant ID, server URL, service-account credent --- +## Environment variable support + +Fields marked **env.\* supported** accept `"env.VAR_NAME"` in addition to a literal value — Bifrost resolves the variable from the process environment at startup. Attribute mapping arrays are always plain JSON (they cannot reference env vars). + +### Okta + +| Field | JSON key | Required | env.\* supported | Notes | +| --- | --- | --- | --- | --- | +| Issuer URL | `issuerUrl` | Yes | Yes | Org server: `https://domain.okta.com`; Custom: `…/oauth2/default` | +| Client ID | `clientId` | Yes | Yes | Application Client ID | +| Client Secret | `clientSecret` | No | Yes | Required for token revocation | +| API Token | `apiToken` | No | Yes | Required for bulk user / team sync | +| Audience | `audience` | No | Yes | Only applies to Custom Authorization Server | +| Team IDs field | `teamIdsField` | No | Yes | JWT claim for group IDs (default: `"groups"`) | +| Role mappings | `attributeRoleMappings` | No | **Plain only** | Array of `{ attribute, value, role }` objects | +| Team mappings | `attributeTeamMappings` | No | **Plain only** | Array of `{ attribute, value, team }` objects | +| Business unit mappings | `attributeBusinessUnitMappings` | No | **Plain only** | Array of `{ attribute, value, businessUnit }` objects | + +### Microsoft Entra ID + +| Field | JSON key | Required | env.\* supported | Notes | +| --- | --- | --- | --- | --- | +| Tenant ID | `tenantId` | Yes | Yes | Azure tenant ID or `"common"` for multi-tenant | +| Client ID | `clientId` | Yes | Yes | Application (client) ID | +| Client Secret | `clientSecret` | Yes | Yes | Client secret for OAuth authentication | +| Cloud | `cloud` | No | Yes | `"commercial"` (default) \| `"gcc-high"` \| `"dod"` | +| Audience | `audience` | No | Yes | JWT audience override (default: `clientId`) | +| App ID URI | `appIdUri` | No | Yes | App ID URI for v1.0 tokens (e.g. `api://{clientId}`) | +| Team IDs field | `teamIdsField` | No | Yes | JWT claim for group IDs (default: `"groups"`) | +| Role mappings | `attributeRoleMappings` | No | **Plain only** | Array of `{ attribute, value, role }` objects | +| Team mappings | `attributeTeamMappings` | No | **Plain only** | Array of `{ attribute, value, team }` objects | +| Business unit mappings | `attributeBusinessUnitMappings` | No | **Plain only** | Array of `{ attribute, value, businessUnit }` objects | + +### Keycloak + +| Field | JSON key | Required | env.\* supported | Notes | +| --- | --- | --- | --- | --- | +| Server URL | `serverUrl` | Yes | Yes | Base URL, e.g. `https://keycloak.company.com` (no `/realms/…`) | +| Realm | `realm` | Yes | Yes | e.g. `"master"` or `"my-app"` | +| Client ID | `clientId` | Yes | Yes | Application client ID | +| Client Secret | `clientSecret` | No | Yes | For confidential clients | +| Audience | `audience` | No | Yes | JWT audience for token validation | +| Team IDs field | `teamIdsField` | No | Yes | JWT claim for group IDs (default: `"groups"`) | +| Role mappings | `attributeRoleMappings` | No | **Plain only** | Array of `{ attribute, value, role }` objects | +| Team mappings | `attributeTeamMappings` | No | **Plain only** | Array of `{ attribute, value, team }` objects | +| Business unit mappings | `attributeBusinessUnitMappings` | No | **Plain only** | Array of `{ attribute, value, businessUnit }` objects | + +### Zitadel + +| Field | JSON key | Required | env.\* supported | Notes | +| --- | --- | --- | --- | --- | +| Domain | `domain` | Yes | Yes | e.g. `"my-instance.zitadel.cloud"` or `"auth.company.com"` | +| Client ID | `clientId` | Yes | Yes | Application client ID | +| Client Secret | `clientSecret` | No | Yes | For confidential clients | +| Project ID | `projectId` | No | Yes | For project-scoped role claims | +| Audience | `audience` | No | Yes | Access-token audience override | +| Service account client ID | `serviceAccountClientId` | No | Yes | Service account for provisioning API access | +| Service account client secret | `serviceAccountClientSecret` | No | Yes | Service account secret | +| Team IDs field | `teamIdsField` | No | Yes | JWT claim for group IDs | +| Role mappings | `attributeRoleMappings` | No | **Plain only** | Array of `{ attribute, value, role }` objects | +| Team mappings | `attributeTeamMappings` | No | **Plain only** | Array of `{ attribute, value, team }` objects | +| Business unit mappings | `attributeBusinessUnitMappings` | No | **Plain only** | Array of `{ attribute, value, businessUnit }` objects | + +### Google Workspace + +| Field | JSON key | Required | env.\* supported | Notes | +| --- | --- | --- | --- | --- | +| Domain | `domain` | Yes | Yes | Google Workspace domain (e.g. `"company.com"`) | +| Client ID | `clientId` | Yes | Yes | Google OAuth2 client ID | +| Client Secret | `clientSecret` | No | Yes | For token revocation | +| Credential mode | `credentialMode` | No | Yes | `"inherit"` (ADC) \| `"env"` \| `"file"` | +| Service account JSON | `serviceAccountJson` | No | Yes | Raw service account JSON string | +| Service account env var | `serviceAccountEnvVar` | No | Yes | Env var containing the service account JSON | +| Service account file | `serviceAccountFile` | No | Yes | Path to the service account JSON key file | +| Admin email | `adminEmail` | Conditional | Yes | Required for Directory API / domain-wide delegation | +| Impersonate service account | `impersonateServiceAccount` | No | Yes | GCP SA email to impersonate when using ADC + Workload Identity | +| Audience | `audience` | No | Yes | Optional JWT audience override | +| Team IDs field | `teamIdsField` | No | Yes | Claim field for group IDs (default: `"groups"`) | +| Role mappings | `attributeRoleMappings` | No | **Plain only** | Array of `{ attribute, value, role }` objects | +| Team mappings | `attributeTeamMappings` | No | **Plain only** | Array of `{ attribute, value, team }` objects | +| Business unit mappings | `attributeBusinessUnitMappings` | No | **Plain only** | Array of `{ attribute, value, businessUnit }` objects | + +### SailPoint + +SailPoint fields are **plain values only** — `env.*` references are not supported for any SailPoint config field. + +| Field | JSON key | Required | Notes | +| --- | --- | --- | --- | +| Product | `product` | Yes | `"isc"` (SailPoint Identity Security Cloud) \| `"iiq"` (IdentityIQ) | +| Tenant | `tenant` | Conditional | ISC only: tenant name (e.g. `"acme"`) | +| Client ID | `clientId` | Conditional | Required for ISC; optional for IIQ OAuth | +| Client Secret | `clientSecret` | Conditional | Required for ISC; optional for IIQ OAuth | +| Base URL | `baseUrl` | Conditional | IIQ only: SCIM base URL | +| Username | `username` | Conditional | IIQ basic auth username | +| Password | `password` | Conditional | IIQ basic auth password | +| Team IDs field | `teamIdsField` | No | JWT claim for group IDs | +| Role mappings | `attributeRoleMappings` | No | Array of `{ attribute, value, role }` objects | +| Team mappings | `attributeTeamMappings` | No | Array of `{ attribute, value, team }` objects | +| Business unit mappings | `attributeBusinessUnitMappings` | No | Array of `{ attribute, value, businessUnit }` objects | + +--- + ## Configuring from the dashboard 1. Navigate to **Governance → User Provisioning** in the Bifrost dashboard. diff --git a/plugins/governance/allow_on_all_virtual_keys_test.go b/plugins/governance/allowonallvirtualkeys_test.go similarity index 100% rename from plugins/governance/allow_on_all_virtual_keys_test.go rename to plugins/governance/allowonallvirtualkeys_test.go diff --git a/plugins/governance/http_transport_prehook_test.go b/plugins/governance/httptransportprehook_test.go similarity index 100% rename from plugins/governance/http_transport_prehook_test.go rename to plugins/governance/httptransportprehook_test.go diff --git a/plugins/governance/main.go b/plugins/governance/main.go index 1a244b9ebf..cd08eb37fa 100644 --- a/plugins/governance/main.go +++ b/plugins/governance/main.go @@ -346,6 +346,10 @@ func (p *GovernancePlugin) HTTPTransportPreHook(ctx *schemas.BifrostContext, req virtualKeyValue := parseVirtualKeyFromHTTPRequest(req) hasRoutingRules := p.store.HasRoutingRules(ctx) + if strings.Contains(req.Path, "passthrough") { + return nil, nil + } + // If no virtual key and no routing rules configured, skip all processing if virtualKeyValue == nil && !hasRoutingRules { return nil, nil @@ -652,12 +656,12 @@ func (p *GovernancePlugin) loadBalanceProvider(ctx *schemas.BifrostContext, req } } - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Loading balance provider for model %s", modelStr)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Loading balance provider for model %s", modelStr)) // Get provider configs for this virtual key providerConfigs := virtualKey.ProviderConfigs if len(providerConfigs) == 0 { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("No provider configs on virtual key %s for model %s, skipping load balancing", virtualKey.Name, modelStr)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelWarn, fmt.Sprintf("No provider configs on virtual key %s for model %s, skipping load balancing", virtualKey.Name, modelStr)) // No provider configs, continue without modification return body, nil } @@ -667,7 +671,7 @@ func (p *GovernancePlugin) loadBalanceProvider(ctx *schemas.BifrostContext, req configuredProviders = append(configuredProviders, pc.Provider) } p.logger.Debug("[Governance] Virtual key has %d provider configs: %v", len(providerConfigs), configuredProviders) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Load balancing model %s across %d configured providers: %v", modelStr, len(providerConfigs), configuredProviders)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Load balancing model %s across %d configured providers: %v", modelStr, len(providerConfigs), configuredProviders)) allowedProviderConfigs := make([]configstoreTables.TableVirtualKeyProviderConfig, 0) for _, config := range providerConfigs { @@ -692,16 +696,16 @@ func (p *GovernancePlugin) loadBalanceProvider(ctx *schemas.BifrostContext, req if isProviderAllowed { // Check if the provider's budget or rate limits are violated using resolver helper methods if p.resolver.isProviderBudgetViolated(ctx, virtualKey, config) { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Provider %s excluded: budget limit violated", config.Provider)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Provider %s excluded: budget limit violated", config.Provider)) continue } if p.resolver.isProviderRateLimitViolated(ctx, virtualKey, config) { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Provider %s excluded: rate limit violated", config.Provider)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Provider %s excluded: rate limit violated", config.Provider)) continue } allowedProviderConfigs = append(allowedProviderConfigs, config) } else { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Provider %s excluded: model %s not in allowed models list", config.Provider, modelStr)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Provider %s excluded: model %s not in allowed models list", config.Provider, modelStr)) } } @@ -710,10 +714,10 @@ func (p *GovernancePlugin) loadBalanceProvider(ctx *schemas.BifrostContext, req allowedProviders = append(allowedProviders, pc.Provider) } p.logger.Debug("[Governance] Allowed providers after filtering: %v", allowedProviders) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Allowed providers after filtering: %v", allowedProviders)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Allowed providers after filtering: %v", allowedProviders)) if len(allowedProviderConfigs) == 0 { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("No eligible providers remaining after filtering for model %s, skipping load balancing", modelStr)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("No eligible providers remaining after filtering for model %s, skipping load balancing", modelStr)) // TODO: Send proper error if (overall VK budget/rate limit) or (all provider budgets/rate limits) are violated // No allowed provider configs, continue without modification return body, nil @@ -755,7 +759,7 @@ func (p *GovernancePlugin) loadBalanceProvider(ctx *schemas.BifrostContext, req } p.logger.Debug("[Governance] Selected provider: %s", selectedProvider) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Selected provider %s for model %s (from %d eligible: %v)", selectedProvider, modelStr, len(allowedProviderConfigs), allowedProviders)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Selected provider %s for model %s (from %d eligible: %v)", selectedProvider, modelStr, len(allowedProviderConfigs), allowedProviders)) // For genai integration, model is present in URL path instead of the request body if isGeminiPath { @@ -810,7 +814,7 @@ func (p *GovernancePlugin) loadBalanceProvider(ctx *schemas.BifrostContext, req // Add fallbacks to request body body["fallbacks"] = fallbacks - ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, fmt.Sprintf("Added %d fallback providers: %v", len(fallbacks), fallbacks)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineGovernance, schemas.LogLevelInfo, fmt.Sprintf("Added %d fallback providers: %v", len(fallbacks), fallbacks)) } return body, nil @@ -896,13 +900,13 @@ func (p *GovernancePlugin) applyRoutingRules(ctx *schemas.BifrostContext, req *s p.logger.Debug("[HTTPTransport] Built routing context: provider=%s, model=%s, requestType=%s, vk=%v, headerCount=%d, paramCount=%d", provider, model, requestType, virtualKey != nil, len(req.Headers), len(req.Query)) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Evaluating routing rules for model=%s, provider=%s, requestType=%s", model, provider, requestType)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Evaluating routing rules for model=%s, provider=%s, requestType=%s", model, provider, requestType)) // Evaluate routing rules decision, err := p.engine.EvaluateRoutingRules(ctx, routingCtx) if err != nil { p.logger.Error("failed to evaluate routing rules: %v", err) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Routing rule evaluation error: %v", err)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelError, fmt.Sprintf("Routing rule evaluation error: %v", err)) return body, nil, nil } diff --git a/plugins/governance/routing.go b/plugins/governance/routing.go index 4df039f12d..4638f70cab 100644 --- a/plugins/governance/routing.go +++ b/plugins/governance/routing.go @@ -3,7 +3,9 @@ package governance import ( "fmt" "math/rand/v2" + "regexp" "strings" + "sync" "github.com/google/cel-go/cel" "github.com/maximhq/bifrost/core/schemas" @@ -102,12 +104,12 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi maxDepth := *re.chainMaxDepth if chainStep >= maxDepth { re.logger.Warn("[RoutingEngine] Routing rule chain exceeded max depth (%d), stopping", maxDepth) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Chain exceeded max depth (%d) at step %d, stopping. Final resolved: provider=%s, model=%s", maxDepth, chainStep, currentProvider, currentModel)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelWarn, fmt.Sprintf("Chain exceeded max depth (%d) at step %d, stopping. Final resolved: provider=%s, model=%s", maxDepth, chainStep, currentProvider, currentModel)) break } if chainStep > 0 { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Chain step %d: re-evaluating with provider=%s, model=%s", chainStep, currentProvider, currentModel)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Chain step %d: re-evaluating with provider=%s, model=%s", chainStep, currentProvider, currentModel)) } // Build CEL variables for the current chain step's provider/model. @@ -127,7 +129,7 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi scopeChain := buildScopeChain(routingCtx.VirtualKey) re.logger.Debug("[RoutingEngine] Scope chain (step=%d): %v", chainStep, scopeChainToStrings(scopeChain)) if chainStep == 0 { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Scope chain: %v", scopeChainToStrings(scopeChain))) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Scope chain: %v", scopeChainToStrings(scopeChain))) } var stepDecision *RoutingDecision @@ -149,7 +151,7 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi for _, r := range rules { ruleNames = append(ruleNames, r.Name) } - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Evaluating scope %s: %d rules [%s]", scope.ScopeName, len(rules), strings.Join(ruleNames, ", "))) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Evaluating scope %s: %d rules [%s]", scope.ScopeName, len(rules), strings.Join(ruleNames, ", "))) for _, rule := range rules { re.logger.Debug("[RoutingEngine] Evaluating rule: name=%s, expression=%s", rule.Name, rule.CelExpression) @@ -157,28 +159,29 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi program, err := re.store.GetRoutingProgram(ctx, rule) if err != nil { re.logger.Warn("[RoutingEngine] Failed to compile rule %s: %v", rule.Name, err) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Rule '%s' skipped: compile error: %v", rule.Name, err)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelError, fmt.Sprintf("Rule '%s' skipped: compile error: %v", rule.Name, err)) continue } matched, err := evaluateCELExpression(program, variables) if err != nil { re.logger.Warn("[RoutingEngine] Failed to evaluate rule %s: %v", rule.Name, err) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Rule '%s' skipped: eval error: %v", rule.Name, err)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelError, fmt.Sprintf("Rule '%s' skipped: eval error: %v", rule.Name, err)) continue } re.logger.Debug("[RoutingEngine] Rule %s evaluation result: matched=%v", rule.Name, matched) if !matched { - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Rule '%s' [%s] → no match", rule.Name, rule.CelExpression)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, + fmt.Sprintf("Rule '%s' [%s] → no match (%s)", rule.Name, rule.CelExpression, buildNoMatchContext(rule.CelExpression, variables))) continue } target, ok := selectWeightedTarget(rule.Targets) if !ok { re.logger.Debug("[RoutingEngine] Rule %s matched but has no valid targets (empty list or all-negative weights), skipping — note: all-zero weights use uniform selection and would not reach here", rule.Name) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Rule '%s' [%s] → matched but no valid targets (empty or all-negative weights), skipping", rule.Name, rule.CelExpression)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelError, fmt.Sprintf("Rule '%s' [%s] → matched but no valid targets (empty or all-negative weights), skipping", rule.Name, rule.CelExpression)) continue } @@ -226,7 +229,7 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi chainSuffix = " [chain_rule=true, continuing]" } re.logger.Debug("[RoutingEngine] Rule matched! Selected target (weight=%.2f): provider=%s, model=%s, fallbacks=%v%s", matchedTargetWeight, stepDecision.Provider, stepDecision.Model, stepDecision.Fallbacks, chainSuffix) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Rule '%s' [%s] → matched, selected target (weight=%.2f): provider=%s, model=%s, fallbacks=%v%s", matchedRule.Name, matchedRule.CelExpression, matchedTargetWeight, stepDecision.Provider, stepDecision.Model, stepDecision.Fallbacks, chainSuffix)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Rule '%s' [%s] → matched, selected target (weight=%.2f): provider=%s, model=%s, fallbacks=%v%s", matchedRule.Name, matchedRule.CelExpression, matchedTargetWeight, stepDecision.Provider, stepDecision.Model, stepDecision.Fallbacks, chainSuffix)) // TERMINATION 2: Rule is terminal (chain_rule=false, the default). if !matchedRule.ChainRule { @@ -237,7 +240,7 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi nextState := fmt.Sprintf("%s|%s", stepDecision.Provider, stepDecision.Model) if _, seen := visited[nextState]; seen { re.logger.Debug("[RoutingEngine] Chain cycle detected at step=%d (state=%s already visited), stopping", chainStep, nextState) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, fmt.Sprintf("Chain cycle detected at step %d (provider=%s, model=%s already visited), stopping. Final resolved: provider=%s, model=%s", chainStep, stepDecision.Provider, stepDecision.Model, stepDecision.Provider, stepDecision.Model)) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Chain cycle detected at step %d (provider=%s, model=%s already visited), stopping. Final resolved: provider=%s, model=%s", chainStep, stepDecision.Provider, stepDecision.Model, stepDecision.Provider, stepDecision.Model)) break } visited[nextState] = struct{}{} @@ -348,7 +351,7 @@ func buildScopeChain(virtualKey *configstoreTables.TableVirtualKey) []ScopeLevel } // evaluateCELExpression evaluates a compiled CEL program with given variables -func evaluateCELExpression(program cel.Program, variables map[string]interface{}) (bool, error) { +func evaluateCELExpression(program cel.Program, variables map[string]any) (bool, error) { if program == nil { return false, fmt.Errorf("CEL program is nil") } @@ -469,6 +472,68 @@ func scopeChainToStrings(chain []ScopeLevel) []string { return scopes } +// buildNoMatchContext builds a compact debug string of scalar variables plus +// only the headers/params keys actually referenced in the CEL expression. +func buildNoMatchContext(expr string, variables map[string]any) string { + parts := []string{ + fmt.Sprintf("model=%q", variables["model"]), + fmt.Sprintf("provider=%q", variables["provider"]), + fmt.Sprintf("request_type=%q", variables["request_type"]), + fmt.Sprintf("budget_used=%.1f%%", variables["budget_used"]), + fmt.Sprintf("tokens_used=%.1f%%", variables["tokens_used"]), + fmt.Sprintf("request=%.1f%%", variables["request"]), + } + for _, mapName := range []string{"headers", "params"} { + keys := extractMapKeysFromCEL(expr, mapName) + if len(keys) == 0 { + continue + } + if m, ok := variables[mapName].(map[string]string); ok { + kvs := make([]string, 0, len(keys)) + for _, k := range keys { + if _, exists := m[k]; exists { + kvs = append(kvs, k+"=") + } else { + kvs = append(kvs, k+"=") + } + } + parts = append(parts, mapName+"("+strings.Join(kvs, ", ")+")") + } + } + return strings.Join(parts, ", ") +} + +// celMapKeyRegexCache caches one *regexp.Regexp per mapName to avoid +// recompiling on every call. Lazy and concurrent-safe via sync.Map's +// LoadOrStore atomicity; benign duplicate compiles on first concurrent miss. +var celMapKeyRegexCache sync.Map // map[string]*regexp.Regexp + +// extractMapKeysFromCEL extracts unique map access keys for mapName from a CEL expression. +// Handles mapName["key"], mapName['key'], and mapName.key patterns. +func extractMapKeysFromCEL(expr, mapName string) []string { + v, ok := celMapKeyRegexCache.Load(mapName) + if !ok { + quoted := regexp.QuoteMeta(mapName) + compiled := regexp.MustCompile(quoted + `\["([^"]+)"\]|` + quoted + `\['([^']+)'\]|` + quoted + `\.([a-zA-Z_][a-zA-Z0-9_]*)`) + v, _ = celMapKeyRegexCache.LoadOrStore(mapName, compiled) + } + re := v.(*regexp.Regexp) + seen := map[string]struct{}{} + var keys []string + for _, m := range re.FindAllStringSubmatch(expr, -1) { + for _, cap := range m[1:] { + if cap != "" { + if _, dup := seen[cap]; !dup { + seen[cap] = struct{}{} + keys = append(keys, cap) + } + break + } + } + } + return keys +} + // createCELEnvironment creates a new CEL environment for routing rules func createCELEnvironment() (*cel.Env, error) { return cel.NewEnv( diff --git a/plugins/governance/store_concurrency_test.go b/plugins/governance/storeconcurrency_test.go similarity index 99% rename from plugins/governance/store_concurrency_test.go rename to plugins/governance/storeconcurrency_test.go index 67e1f081ff..f316895426 100644 --- a/plugins/governance/store_concurrency_test.go +++ b/plugins/governance/storeconcurrency_test.go @@ -121,4 +121,3 @@ func TestResetBudgetAt_ConcurrentResettersCollapse(t *testing.T) { assert.Equal(t, 0.0, final.CurrentUsage) assert.True(t, final.LastReset.Equal(newLastReset)) } - diff --git a/plugins/semanticcache/main.go b/plugins/semanticcache/main.go index 39b121ee18..148bbfef1a 100644 --- a/plugins/semanticcache/main.go +++ b/plugins/semanticcache/main.go @@ -117,16 +117,16 @@ type StreamChunk struct { // StreamAccumulator manages accumulation of streaming chunks for caching type StreamAccumulator struct { - RequestID string // The request ID - StorageID string // The final cache entry ID - Chunks []*StreamChunk // All chunks for this stream - IsComplete bool // Whether the stream is complete - HasError bool // Whether any chunk in the stream had an error - FinalTimestamp time.Time // When the stream completed - Embedding []float32 // Embedding for the original request - Metadata map[string]interface{} // Metadata for caching - TTL time.Duration // TTL for this cache entry - mu sync.Mutex // Protects chunk operations + RequestID string // The request ID + StorageID string // The final cache entry ID + Chunks []*StreamChunk // All chunks for this stream + IsComplete bool // Whether the stream is complete + HasError bool // Whether any chunk in the stream had an error + FinalTimestamp time.Time // When the stream completed + Embedding []float32 // Embedding for the original request + Metadata map[string]any // Metadata for caching + TTL time.Duration // TTL for this cache entry + mu sync.Mutex // Protects chunk operations } // Plugin implements the schemas.LLMPlugin interface for semantic caching. @@ -308,10 +308,10 @@ func Init(ctx context.Context, config *Config, logger schemas.Logger, store vect // Set cache behavior defaults if config.CacheByModel == nil { - config.CacheByModel = bifrost.Ptr(true) + config.CacheByModel = new(true) } if config.CacheByProvider == nil { - config.CacheByProvider = bifrost.Ptr(true) + config.CacheByProvider = new(true) } plugin := &Plugin{ diff --git a/transports/bifrost-http/integrations/anthropic.go b/transports/bifrost-http/integrations/anthropic.go index da9fff1da0..e21e54d2f5 100644 --- a/transports/bifrost-http/integrations/anthropic.go +++ b/transports/bifrost-http/integrations/anthropic.go @@ -203,58 +203,11 @@ func hasFastModeBetaHeader(headers map[string][]string) bool { return false } -// filterVertexUnsupportedBetaHeaders removes beta headers that Vertex AI doesn't support. -// Vertex AI doesn't support: structured-outputs, advanced-tool-use, prompt-caching-scope, mcp-client. -func filterVertexUnsupportedBetaHeaders(headers map[string][]string) map[string][]string { - var betaHeaderKey string - var betaHeaders []string - var found bool - for k, v := range headers { - if strings.ToLower(k) == anthropic.AnthropicBetaHeader { - betaHeaderKey = k - betaHeaders = v - found = true - break - } - } - - if found { - var filteredBetas []string - for _, headerValue := range betaHeaders { - // Split comma-separated beta headers - for beta := range strings.SplitSeq(headerValue, ",") { - beta = strings.TrimSpace(beta) - if beta == "" { - continue - } - // Skip unsupported headers for Vertex. - // Use prefix matching so that future date bumps - // (e.g. structured-outputs-2025-12-15) are still caught. - if strings.HasPrefix(beta, anthropic.AnthropicAdvancedToolUseBetaHeaderPrefix) || - strings.HasPrefix(beta, anthropic.AnthropicStructuredOutputsBetaHeaderPrefix) || - strings.HasPrefix(beta, anthropic.AnthropicPromptCachingScopeBetaHeaderPrefix) || - strings.HasPrefix(beta, anthropic.AnthropicMCPClientBetaHeaderPrefix) || - strings.HasPrefix(beta, anthropic.AnthropicSkillsBetaHeaderPrefix) || - strings.HasPrefix(beta, anthropic.AnthropicFastModeBetaHeaderPrefix) || - strings.HasPrefix(beta, anthropic.AnthropicRedactThinkingBetaHeaderPrefix) { - continue - } - filteredBetas = append(filteredBetas, beta) - } - } - if len(filteredBetas) > 0 { - headers[betaHeaderKey] = []string{strings.Join(filteredBetas, ",")} - } else { - delete(headers, betaHeaderKey) - } - } - - return headers -} - // extractPassthroughHeaders filters headers to only include those in the safe whitelist. -// Header matching is case-insensitive. -func extractPassthroughHeaders(allHeaders map[string][]string, provider schemas.ModelProvider) map[string][]string { +// Header matching is case-insensitive. Provider-aware beta-header filtering happens +// downstream at each provider's wire layer (e.g. anthropic.go, vertex.go), where +// networkConfig.BetaHeaderOverrides is in scope. +func extractPassthroughHeaders(allHeaders map[string][]string) map[string][]string { filtered := make(map[string][]string) for k, v := range allHeaders { if passthroughSafeHeaders[strings.ToLower(k)] { @@ -371,7 +324,7 @@ func checkAnthropicPassthrough(ctx *fasthttp.RequestCtx, bifrostCtx *schemas.Bif bifrostCtx.SetValue(schemas.BifrostContextKeySkipKeySelection, true) } else { // API key flow: pass only whitelisted safe headers (like anthropic-beta for feature detection) - passthroughHeaders := extractPassthroughHeaders(headers, provider) + passthroughHeaders := extractPassthroughHeaders(headers) if len(passthroughHeaders) > 0 { bifrostCtx.SetValue(schemas.BifrostContextKeyExtraHeaders, passthroughHeaders) } diff --git a/transports/bifrost-http/integrations/anthropic_test.go b/transports/bifrost-http/integrations/anthropic_test.go deleted file mode 100644 index e9ef5bbac2..0000000000 --- a/transports/bifrost-http/integrations/anthropic_test.go +++ /dev/null @@ -1,87 +0,0 @@ -package integrations - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestFilterVertexUnsupportedBetaHeaders(t *testing.T) { - t.Run("filters known exact header values", func(t *testing.T) { - headers := map[string][]string{ - "anthropic-beta": {"advanced-tool-use-2025-11-20,structured-outputs-2025-11-13,mcp-client-2025-04-04,prompt-caching-scope-2026-01-05"}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - _, ok := result["anthropic-beta"] - assert.False(t, ok, "all unsupported beta headers should be removed, leaving no anthropic-beta key") - }) - - t.Run("filters bumped date variants", func(t *testing.T) { - // Simulate Anthropic bumping version dates in the future - headers := map[string][]string{ - "anthropic-beta": {"structured-outputs-2025-12-15,advanced-tool-use-2026-03-01,mcp-client-2026-01-01,prompt-caching-scope-2027-06-30"}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - _, ok := result["anthropic-beta"] - assert.False(t, ok, "bumped-date variants of unsupported headers should also be filtered") - }) - - t.Run("passes through unrelated beta headers", func(t *testing.T) { - headers := map[string][]string{ - "anthropic-beta": {"interleaved-thinking-2025-05-14,files-api-2025-04-14"}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - vals, ok := result["anthropic-beta"] - assert.True(t, ok, "unrelated beta headers should be preserved") - assert.Equal(t, []string{"interleaved-thinking-2025-05-14,files-api-2025-04-14"}, vals) - }) - - t.Run("filters unsupported and keeps supported in mixed list", func(t *testing.T) { - headers := map[string][]string{ - "anthropic-beta": {"interleaved-thinking-2025-05-14,structured-outputs-2025-11-13,files-api-2025-04-14,mcp-client-2025-04-04"}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - vals, ok := result["anthropic-beta"] - assert.True(t, ok, "supported beta headers should be preserved") - assert.Equal(t, []string{"interleaved-thinking-2025-05-14,files-api-2025-04-14"}, vals) - }) - - t.Run("filters bumped unsupported mixed with supported", func(t *testing.T) { - // Future-proof: bumped dates should still be filtered - headers := map[string][]string{ - "anthropic-beta": {"structured-outputs-2026-01-01,interleaved-thinking-2025-05-14,advanced-tool-use-2026-06-15"}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - vals, ok := result["anthropic-beta"] - assert.True(t, ok, "supported beta headers should be preserved even when mixed with bumped unsupported ones") - assert.Equal(t, []string{"interleaved-thinking-2025-05-14"}, vals) - }) - - t.Run("returns headers unchanged when no anthropic-beta key present", func(t *testing.T) { - headers := map[string][]string{ - "content-type": {"application/json"}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - assert.Equal(t, headers, result) - }) - - t.Run("handles empty anthropic-beta value gracefully", func(t *testing.T) { - headers := map[string][]string{ - "anthropic-beta": {""}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - // Empty string after trimming is not an unsupported header, but it is also empty — key should be removed - _, ok := result["anthropic-beta"] - assert.False(t, ok, "empty beta header list should result in key removal") - }) - - t.Run("case-insensitive key matching for Anthropic-Beta header", func(t *testing.T) { - headers := map[string][]string{ - "Anthropic-Beta": {"structured-outputs-2025-11-13,interleaved-thinking-2025-05-14"}, - } - result := filterVertexUnsupportedBetaHeaders(headers) - vals, ok := result["Anthropic-Beta"] - assert.True(t, ok, "header key casing should be preserved and matching should be case-insensitive") - assert.Equal(t, []string{"interleaved-thinking-2025-05-14"}, vals) - }) -} diff --git a/transports/bifrost-http/lib/config.go b/transports/bifrost-http/lib/config.go index 9e059ccfe1..388cf7b3b2 100644 --- a/transports/bifrost-http/lib/config.go +++ b/transports/bifrost-http/lib/config.go @@ -780,9 +780,7 @@ func loadProviders(ctx context.Context, config *Config, configData *ConfigData) } else if len(providersInConfigStore) == 0 { // No providers in file and none in DB — auto-detect from environment config.autoDetectProviders(ctx) - for k, v := range config.Providers { - providersInConfigStore[k] = v - } + maps.Copy(providersInConfigStore, config.Providers) } // Update store and config if config.ConfigStore != nil { @@ -797,7 +795,7 @@ func loadProviders(ctx context.Context, config *Config, configData *ConfigData) // processProvider processes a single provider configuration from config file func processProvider( - config *Config, + _ *Config, providerName string, providerCfgInFile configstore.ProviderConfig, providersInConfigStore map[schemas.ModelProvider]configstore.ProviderConfig, diff --git a/ui/app/workspace/logs/sheets/logDetailView.tsx b/ui/app/workspace/logs/sheets/logDetailView.tsx index b48699ee1c..55a7d0551b 100644 --- a/ui/app/workspace/logs/sheets/logDetailView.tsx +++ b/ui/app/workspace/logs/sheets/logDetailView.tsx @@ -19,6 +19,7 @@ import { Button } from "@/components/ui/button"; import { CodeEditor } from "@/components/ui/codeEditor"; import { DropdownMenu, + DropdownMenuCheckboxItem, DropdownMenuContent, DropdownMenuItem, DropdownMenuSeparator, @@ -53,6 +54,7 @@ import { AlertCircle, ChevronDown, Clipboard, + Copy, Download, Loader2, MoreVertical, @@ -297,6 +299,53 @@ const messageRoleLabel: Record = { tool: "Tool", }; +function RoutingDecisionLogs({ logs }: { logs: string }) { + const { copy } = useCopyToClipboard({ successMessage: "Copied" }); + return ( +
+
+
Routing Decision Logs
+ +
+
+ {logs + .split("\n") + .filter((l) => l.trim()) + .map((line, i) => { + const m = line.match(/^\[(\d+)\]\s+\[([^\]]+)\]\s+-\s+(.*)$/); + const ts = m ? Number(m[1]) : null; + const scope = m ? m[2] : null; + const message = m ? m[3] : line; + return ( +
+ {ts != null ? ( + + {format(new Date(ts), "HH:mm:ss.SSS")} + + ) : null} + {scope ? ( + + {scope} + + ) : null} + {message} +
+ ); + })} +
+
+ ); +} + function CollapsibleCode({ text, preview = 3, @@ -413,6 +462,8 @@ export function LogDetailView({ successMessage: "Request body copied to clipboard", errorMessage: "Failed to copy request body", }); + const allRoles: MessageRole[] = ["system", "user", "assistant", "tool", "reasoning"]; + const [visibleRoles, setVisibleRoles] = useState>(new Set(allRoles)); if (!log) return null; @@ -650,6 +701,7 @@ export function LogDetailView({ mono value={log.model || "—"} sub={log.provider?.toLowerCase() || ""} + valueClass="whitespace-normal overflow-visible break-all" hasRightBorder /> +
+ + + + + + + setVisibleRoles(checked ? new Set(allRoles) : new Set()) + } + > + Show all messages + + + {( + [ + ["system", "System"], + ["user", "User"], + ["assistant", "Assistant"], + ["tool", "Tool"], + ["reasoning", "Reasoning"], + ] as [MessageRole, string][] + ).map(([role, label]) => ( + + setVisibleRoles((prev) => { + const next = new Set(prev); + checked ? next.add(role) : next.delete(role); + return next; + }) + } + > + + {label} + + ))} + + setVisibleRoles(new Set())} + className="text-muted-foreground justify-center text-[12px]" + > + Clear all + + + +
{(log.ocr_input || log.ocr_output) && ( )} @@ -1540,7 +1662,12 @@ export function LogDetailView({ {!isPassthrough && ((log.input_history && log.input_history.length > 0) || (log.output_message && !log.error_details?.error.message)) && (
- {log.input_history?.map((message, index) => { + {(visibleRoles.size < allRoles.length + ? log.input_history?.filter((m) => + visibleRoles.has(((m.role as string) || "user") as MessageRole) + ) + : log.input_history + )?.map((message, index) => { const role = ((message.role as string) || "user") as MessageRole; const text = extractMessageText(message); @@ -1611,6 +1738,7 @@ export function LogDetailView({ })} {log.output_message && !log.error_details?.error.message && + visibleRoles.has("assistant") && (() => { const text = extractMessageText(log.output_message); const lineCount = text ? text.split("\n").length : 0; @@ -1639,11 +1767,17 @@ export function LogDetailView({ )} {(() => { - const inputMsgs = log.responses_input_history ?? []; - const outputMsgs = + const rawInput = log.responses_input_history ?? []; + const inputMsgs = visibleRoles.size < allRoles.length + ? rawInput.filter((m) => visibleRoles.has(getResponsesRole(m))) + : rawInput; + const rawOutput = log.status !== "processing" && !log.error_details?.error.message ? (log.responses_output ?? []) : []; + const outputMsgs = visibleRoles.size < allRoles.length + ? rawOutput.filter((m) => visibleRoles.has(getResponsesRole(m))) + : rawOutput; const all: ResponsesMessage[] = [...inputMsgs, ...outputMsgs]; if (all.length === 0) return null; return ( @@ -1953,44 +2087,7 @@ export function LogDetailView({ )} {log.routing_engine_logs && ( - log.routing_engine_logs || ""} - > -
- {log.routing_engine_logs - .split("\n") - .filter((l) => l.trim()) - .map((line, i) => { - const m = line.match( - /^\[(\d+)\]\s+\[([^\]]+)\]\s+-\s+(.*)$/, - ); - const ts = m ? Number(m[1]) : null; - const scope = m ? m[2] : null; - const message = m ? m[3] : line; - return ( -
- {ts != null ? ( - - {format(new Date(ts), "HH:mm:ss.SSS")} - - ) : null} - {scope ? ( - - {scope} - - ) : null} - - {message} - -
- ); - })} -
-
+ )} {!log.attempt_trail?.length && !log.routing_engine_logs && (
diff --git a/ui/components/sidebar.tsx b/ui/components/sidebar.tsx index 469834a065..62b361b03f 100644 --- a/ui/components/sidebar.tsx +++ b/ui/components/sidebar.tsx @@ -167,6 +167,8 @@ const getSidebarItemHref = (item: Pick) => { return item.queryParam ? `${item.url}?tab=${item.queryParam}` : item.url; }; +const slug = (s: string) => s.toLowerCase().replace(/\s+/g, "-"); + const TIME_FILTER_PAGES = new Set(["/workspace/dashboard", "/workspace/logs", "/workspace/mcp-logs"]); const SidebarItemView = ({ @@ -194,6 +196,24 @@ const SidebarItemView = ({ expandSidebar: () => void; highlightedUrl?: string; }) => { + const [flyoutOpen, setFlyoutOpen] = useState(false); + const flyoutCloseTimer = useRef | null>(null); + const openFlyout = () => { + if (flyoutCloseTimer.current) clearTimeout(flyoutCloseTimer.current); + setFlyoutOpen(true); + }; + const closeFlyout = () => { + if (flyoutCloseTimer.current) clearTimeout(flyoutCloseTimer.current); + flyoutCloseTimer.current = setTimeout(() => { + setFlyoutOpen(false); + flyoutCloseTimer.current = null; + }, 80); + }; + useEffect(() => { + return () => { + if (flyoutCloseTimer.current) clearTimeout(flyoutCloseTimer.current); + }; + }, []); const hasSubItems = "subItems" in item && item.subItems && item.subItems.length > 0; const isRouteMatch = (url: string) => { if (url === "/workspace/custom-pricing") return pathname === url; @@ -266,7 +286,7 @@ const SidebarItemView = ({ let menuButton: React.ReactNode; if (hasSubItems) { menuButton = ( - + {innerContent} ); @@ -296,7 +316,72 @@ const SidebarItemView = ({ return ( - {menuButton} + {isSidebarCollapsed && hasSubItems ? ( + + +
{menuButton}
+
+ +
{item.title}
+ {item.subItems?.map((subItem) => { + const href = getSidebarItemHref(subItem); + const isSubItemActive = subItem.queryParam ? pathname === subItem.url : pathname.startsWith(subItem.url); + const SubItemIcon = subItem.icon; + const subSlug = slug(subItem.title); + const inner = ( +
+ {SubItemIcon && ( + + )} + + {subItem.title} + + {subItem.tag && ( + + {subItem.tag} + + )} +
+ ); + return ( +
setFlyoutOpen(false)} + > + {subItem.hasAccess === false ? ( +
+ {inner} +
+ ) : ( + + {inner} + + )} +
+ ); + })} +
+
+ ) : ( + menuButton + )} {hasSubItems && isExpanded && ( {item.subItems?.map((subItem: SidebarItem) => {