diff --git a/plugins/governance/changelog.md b/plugins/governance/changelog.md index e69de29bb2..140698b1c3 100644 --- a/plugins/governance/changelog.md +++ b/plugins/governance/changelog.md @@ -0,0 +1 @@ +- fix: routing rule chain no longer halts when a chain_rule resolves to the same provider/model (self-loop); subsequent rules now continue to evaluate correctly diff --git a/plugins/governance/routing.go b/plugins/governance/routing.go index 4638f70cab..4c8b33eec4 100644 --- a/plugins/governance/routing.go +++ b/plugins/governance/routing.go @@ -79,7 +79,7 @@ func NewRoutingEngine(store GovernanceStore, logger schemas.Logger, chainMaxDept // and the full scope chain is re-evaluated with the updated context. This repeats until: // 1. No rule matches the current context // 2. A terminal rule matches (chain_rule=false, the default) -// 3. A cycle is detected (a provider/model state was already visited) +// 3. Every chain-rule that could match has already fired once (all candidates exhausted) // 4. The chain exceeds the configured max depth (chainMaxDepth, default 10) func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routingCtx *RoutingContext) (*RoutingDecision, error) { if routingCtx == nil { @@ -92,10 +92,10 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi currentProvider := routingCtx.Provider currentModel := routingCtx.Model - // Track visited provider/model states to detect cycles (e.g. A→B→A). - visited := map[string]struct{}{ - fmt.Sprintf("%s|%s", currentProvider, currentModel): {}, - } + // Track which rule IDs have already fired to prevent a rule from matching more than once per chain. + // This allows a self-looping rule (target == current state) to fire once and then let subsequent + // rules in the chain run, rather than halting with a cycle error. + visitedRuleIDs := map[string]struct{}{} var finalDecision *RoutingDecision @@ -154,6 +154,11 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Evaluating scope %s: %d rules [%s]", scope.ScopeName, len(rules), strings.Join(ruleNames, ", "))) for _, rule := range rules { + if _, fired := visitedRuleIDs[rule.ID]; fired { + re.logger.Debug("[RoutingEngine] Skipping rule %s (already fired this chain)", rule.Name) + ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Rule '%s' skipped: already fired in this chain", rule.Name)) + continue + } re.logger.Debug("[RoutingEngine] Evaluating rule: name=%s, expression=%s", rule.Name, rule.CelExpression) program, err := re.store.GetRoutingProgram(ctx, rule) @@ -174,7 +179,7 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi if !matched { ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, - fmt.Sprintf("Rule '%s' [%s] → no match (%s)", rule.Name, rule.CelExpression, buildNoMatchContext(rule.CelExpression, variables))) + fmt.Sprintf("Rule '%s' [%s] → no match (%s)", rule.Name, rule.CelExpression, buildNoMatchContext(rule.CelExpression, variables))) continue } @@ -236,14 +241,8 @@ func (re *RoutingEngine) EvaluateRoutingRules(ctx *schemas.BifrostContext, routi break } - // TERMINATION 3: Cycle detection — if the next state was already visited, continuing would loop forever. - nextState := fmt.Sprintf("%s|%s", stepDecision.Provider, stepDecision.Model) - if _, seen := visited[nextState]; seen { - re.logger.Debug("[RoutingEngine] Chain cycle detected at step=%d (state=%s already visited), stopping", chainStep, nextState) - ctx.AppendRoutingEngineLog(schemas.RoutingEngineRoutingRule, schemas.LogLevelInfo, fmt.Sprintf("Chain cycle detected at step %d (provider=%s, model=%s already visited), stopping. Final resolved: provider=%s, model=%s", chainStep, stepDecision.Provider, stepDecision.Model, stepDecision.Provider, stepDecision.Model)) - break - } - visited[nextState] = struct{}{} + // Mark this chain-rule as fired; it will be skipped in all subsequent chain steps. + visitedRuleIDs[matchedRule.ID] = struct{}{} // Advance context for next chain iteration. currentProvider = schemas.ModelProvider(stepDecision.Provider) diff --git a/plugins/governance/routing_test.go b/plugins/governance/routing_test.go index b1816d8dd4..f8125a975c 100644 --- a/plugins/governance/routing_test.go +++ b/plugins/governance/routing_test.go @@ -813,9 +813,9 @@ func TestEvaluateRoutingRules_TerminalRuleStopsChain(t *testing.T) { assert.Equal(t, "terminal-a", decision.MatchedRuleID) } -// TestEvaluateRoutingRules_ConvergenceStopsChain tests that the cycle-detection mechanism stops -// the chain when a chain_rule=true rule resolves to a provider/model already visited (no-op loop). -func TestEvaluateRoutingRules_ConvergenceStopsChain(t *testing.T) { +// TestEvaluateRoutingRules_SelfLoopContinuesToNextRule tests that a chain_rule=true rule which +// resolves to the same provider/model (self-loop) fires once and then allows the next rule to run. +func TestEvaluateRoutingRules_SelfLoopContinuesToNextRule(t *testing.T) { store, err := NewLocalGovernanceStore(context.Background(), NewMockLogger(), nil, &configstore.GovernanceConfig{}, nil) require.NoError(t, err) bgCtx := schemas.NewBifrostContext(context.Background(), time.Now()) @@ -823,10 +823,67 @@ func TestEvaluateRoutingRules_ConvergenceStopsChain(t *testing.T) { engine, err := NewRoutingEngine(store, NewMockLogger(), schemas.Ptr(10)) require.NoError(t, err) - // Rule A: chain_rule=true but resolves back to the initial provider/model — creates a cycle. + // Rule A: matches gpt-4o, chain_rule=true but resolves back to openai/gpt-4o (self-loop). + // Should fire once and then be skipped so Rule B can run. ruleA := &configstoreTables.TableRoutingRule{ - ID: "converge-a", - Name: "Convergence Rule A", + ID: "self-loop-a", + Name: "Self-Loop Rule A", + CelExpression: "model == 'gpt-4o'", + Targets: []configstoreTables.TableRoutingTarget{ + {Provider: bifrost.Ptr("openai"), Model: bifrost.Ptr("gpt-4o"), Weight: 1.0}, + }, + Enabled: true, + Scope: "global", + Priority: 0, + ChainRule: true, + } + require.NoError(t, store.UpdateRoutingRuleInMemory(context.Background(), ruleA)) + + // Rule B: also matches gpt-4o, terminal — should be reached after Rule A fires once. + ruleB := &configstoreTables.TableRoutingRule{ + ID: "self-loop-b", + Name: "Self-Loop Rule B", + CelExpression: "model == 'gpt-4o'", + Targets: []configstoreTables.TableRoutingTarget{ + {Provider: bifrost.Ptr("anthropic"), Model: bifrost.Ptr("claude-3"), Weight: 1.0}, + }, + Enabled: true, + Scope: "global", + Priority: 1, + ChainRule: false, + } + require.NoError(t, store.UpdateRoutingRuleInMemory(context.Background(), ruleB)) + + ctx := &RoutingContext{ + Provider: schemas.OpenAI, + Model: "gpt-4o", + Headers: map[string]string{}, + QueryParams: map[string]string{}, + } + + decision, err := engine.EvaluateRoutingRules(bgCtx, ctx) + require.NoError(t, err) + require.NotNil(t, decision) + + // Rule A fired once (self-loop), then was skipped. Rule B matched on the second step. + assert.Equal(t, "anthropic", decision.Provider) + assert.Equal(t, "claude-3", decision.Model) + assert.Equal(t, "self-loop-b", decision.MatchedRuleID) +} + +// TestEvaluateRoutingRules_SelfLoopAloneTerminates tests that a self-looping chain rule with no +// other rules terminates cleanly after firing once (TERMINATION 1: no remaining rule matches). +func TestEvaluateRoutingRules_SelfLoopAloneTerminates(t *testing.T) { + store, err := NewLocalGovernanceStore(context.Background(), NewMockLogger(), nil, &configstore.GovernanceConfig{}, nil) + require.NoError(t, err) + bgCtx := schemas.NewBifrostContext(context.Background(), time.Now()) + + engine, err := NewRoutingEngine(store, NewMockLogger(), schemas.Ptr(10)) + require.NoError(t, err) + + ruleA := &configstoreTables.TableRoutingRule{ + ID: "solo-self-loop", + Name: "Solo Self-Loop", CelExpression: "model == 'gpt-4o'", Targets: []configstoreTables.TableRoutingTarget{ {Provider: bifrost.Ptr("openai"), Model: bifrost.Ptr("gpt-4o"), Weight: 1.0}, @@ -849,10 +906,10 @@ func TestEvaluateRoutingRules_ConvergenceStopsChain(t *testing.T) { require.NoError(t, err) require.NotNil(t, decision) - // Cycle detected after the first match; the last matched decision (openai/gpt-4o) is returned. + // Rule A fired once, then was skipped. No other rules → terminates with Rule A's decision. assert.Equal(t, "openai", decision.Provider) assert.Equal(t, "gpt-4o", decision.Model) - assert.Equal(t, "converge-a", decision.MatchedRuleID) + assert.Equal(t, "solo-self-loop", decision.MatchedRuleID) } // TestEvaluateRoutingRules_MaxDepthCutoff tests that the chain stops once chainMaxDepth is reached, diff --git a/transports/changelog.md b/transports/changelog.md index 960d3d740b..53550c479e 100644 --- a/transports/changelog.md +++ b/transports/changelog.md @@ -1,3 +1,4 @@ +- fix: routing rule chain no longer halts when a chain_rule resolves to the same provider/model (self-loop); subsequent rules now continue to evaluate correctly - fix: response extra fields request type corruption for streaming requests on high concurrency - feat: added support for per-request content logging toggle via `x-bf-disable-content-logging` header - feat: auto-resolve provider when model string has no provider prefix