diff --git a/internal/controller/ai_gateway_route.go b/internal/controller/ai_gateway_route.go index 814ad4f166..4e01a19a73 100644 --- a/internal/controller/ai_gateway_route.go +++ b/internal/controller/ai_gateway_route.go @@ -127,6 +127,10 @@ func defaultHTTPRouteFilters(ns string) []*egv1a1.HTTPRouteFilter { // syncAIGatewayRoute is the main logic for reconciling the AIGatewayRoute resource. // This is decoupled from the Reconcile method to centralize the error handling and status updates. func (c *AIGatewayRouteController) syncAIGatewayRoute(ctx context.Context, aiGatewayRoute *aigv1a1.AIGatewayRoute) error { + if handleFinalizer(ctx, c.client, c.logger, aiGatewayRoute, c.syncGateways) { // Propagate the AIGatewayRoute deletion all the way up to relevant Gateways. + return nil + } + // Check if the static default HTTPRouteFilters exist in the namespace. filters := defaultHTTPRouteFilters(aiGatewayRoute.Namespace) for _, base := range filters { diff --git a/internal/controller/ai_gateway_route_test.go b/internal/controller/ai_gateway_route_test.go index 8a3b18f24c..0f0b8aa9c8 100644 --- a/internal/controller/ai_gateway_route_test.go +++ b/internal/controller/ai_gateway_route_test.go @@ -43,7 +43,9 @@ func TestAIGatewayRouteController_Reconcile(t *testing.T) { // Do it for the second time with a slightly different configuration. var current aigv1a1.AIGatewayRoute err = fakeClient.Get(t.Context(), types.NamespacedName{Namespace: "default", Name: "myroute"}, ¤t) + // Make sure the finalizer is added. require.NoError(t, err) + require.Contains(t, current.ObjectMeta.Finalizers, aiGatewayControllerFinalizer, "Finalizer should be added") current.Spec.APISchema = aigv1a1.VersionedAPISchema{Name: aigv1a1.APISchemaOpenAI, Version: ptr.To("v123")} current.Spec.TargetRefs = []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName{ {LocalPolicyTargetReference: gwapiv1a2.LocalPolicyTargetReference{Name: "mytarget"}}, diff --git a/internal/controller/ai_service_backend.go b/internal/controller/ai_service_backend.go index 014aa19a5e..8a59d0d978 100644 --- a/internal/controller/ai_service_backend.go +++ b/internal/controller/ai_service_backend.go @@ -60,9 +60,12 @@ func (c *AIBackendController) Reconcile(ctx context.Context, req reconcile.Reque return ctrl.Result{}, nil } -// syncAIGatewayRoute is the main logic for reconciling the AIServiceBackend resource. +// syncAIServiceBackend is the main logic for reconciling the AIServiceBackend resource. // This is decoupled from the Reconcile method to centralize the error handling and status updates. func (c *AIBackendController) syncAIServiceBackend(ctx context.Context, aiBackend *aigv1a1.AIServiceBackend) error { + // Propagate the bsp events all the way up to relevant Gateways regardless of being deleted or not. + _ = handleFinalizer(ctx, c.client, c.logger, aiBackend, nil) + // Notify the AI Gateway Route controller about the AIServiceBackend change. key := fmt.Sprintf("%s.%s", aiBackend.Name, aiBackend.Namespace) var aiGatewayRoutes aigv1a1.AIGatewayRouteList err := c.client.List(ctx, &aiGatewayRoutes, client.MatchingFields{k8sClientIndexBackendToReferencingAIGatewayRoute: key}) diff --git a/internal/controller/ai_service_backend_test.go b/internal/controller/ai_service_backend_test.go index e463bbaac6..5b4e14ece4 100644 --- a/internal/controller/ai_service_backend_test.go +++ b/internal/controller/ai_service_backend_test.go @@ -81,6 +81,7 @@ func TestAIServiceBackendController_Reconcile(t *testing.T) { require.Len(t, backend.Status.Conditions, 1) require.Equal(t, aigv1a1.ConditionTypeAccepted, backend.Status.Conditions[0].Type) require.Equal(t, "AIServiceBackend reconciled successfully", backend.Status.Conditions[0].Message) + require.Contains(t, backend.ObjectMeta.Finalizers, aiGatewayControllerFinalizer, "Finalizer should be set") // Test the case where the AIServiceBackend is being deleted. err = fakeClient.Delete(t.Context(), &aigv1a1.AIServiceBackend{ObjectMeta: metav1.ObjectMeta{Name: "mybackend", Namespace: "default"}}) diff --git a/internal/controller/backend_security_policy.go b/internal/controller/backend_security_policy.go index b78bcc807d..7453b6e118 100644 --- a/internal/controller/backend_security_policy.go +++ b/internal/controller/backend_security_policy.go @@ -81,6 +81,9 @@ func (c *BackendSecurityPolicyController) Reconcile(ctx context.Context, req ctr // reconcile reconciles BackendSecurityPolicy but extracted from Reconcile to centralize error handling. func (c *BackendSecurityPolicyController) reconcile(ctx context.Context, bsp *aigv1a1.BackendSecurityPolicy) (res ctrl.Result, err error) { + if handleFinalizer(ctx, c.client, c.logger, bsp, c.syncBackendSecurityPolicy) { // Propagate the bsp deletion all the way to relevant Gateways. + return res, nil + } if bsp.Spec.Type != aigv1a1.BackendSecurityPolicyTypeAPIKey { res, err = c.rotateCredential(ctx, bsp) if err != nil { diff --git a/internal/controller/backend_security_policy_test.go b/internal/controller/backend_security_policy_test.go index 5c635ac45b..648e548eb5 100644 --- a/internal/controller/backend_security_policy_test.go +++ b/internal/controller/backend_security_policy_test.go @@ -83,6 +83,7 @@ func TestBackendSecurityController_Reconcile(t *testing.T) { require.Len(t, bsp.Status.Conditions, 1) require.Equal(t, aigv1a1.ConditionTypeAccepted, bsp.Status.Conditions[0].Type) require.Equal(t, "BackendSecurityPolicy reconciled successfully", bsp.Status.Conditions[0].Message) + require.Contains(t, bsp.Finalizers, aiGatewayControllerFinalizer, "Finalizer should be added") // Test the case where the BackendSecurityPolicy is being deleted. err = fakeClient.Delete(t.Context(), &aigv1a1.BackendSecurityPolicy{ObjectMeta: metav1.ObjectMeta{Name: backendSecurityPolicyName, Namespace: namespace}}) diff --git a/internal/controller/controller.go b/internal/controller/controller.go index a15139562a..c7570b2371 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -21,6 +21,7 @@ import ( "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -290,3 +291,48 @@ func newConditions(conditionType, message string) []metav1.Condition { } return []metav1.Condition{condition} } + +// aiGatewayControllerFinalizer is the name of the finalizer added to various AI Gateway resources. +const aiGatewayControllerFinalizer = "aigateway.envoyproxy.io/finalizer" + +// handleFinalizer checks if the object has a deletion timestamp. If it does, it removes the finalizer and +// calls the onDeletionFn if provided. Otherwise, it adds the finalizer to the object and updates it +// so that the finalizer is persisted. +// +// onDeletionFn can be nil, in which case it will not be called. The function can return an error but should not +// be a recoverable error. For example, onDeletionFn only propagates the deletion of the object to other resources. +// See the call sites of this function for examples. +func handleFinalizer[objType client.Object]( + ctx context.Context, client client.Client, + logger logr.Logger, + o objType, + onDeletionFn func(ctx context.Context, o objType) error, +) (onDelete bool) { + if o.GetDeletionTimestamp().IsZero() { + if !ctrlutil.ContainsFinalizer(o, aiGatewayControllerFinalizer) { + ctrlutil.AddFinalizer(o, aiGatewayControllerFinalizer) + if err := client.Update(ctx, o); err != nil { + // This shouldn't happen in normal operation, but if it does, we log the error. + logger.Error(err, "Failed to add finalizer to object", + "namespace", o.GetNamespace(), "name", o.GetName()) + } + } + return false + } + if ctrlutil.ContainsFinalizer(o, aiGatewayControllerFinalizer) { + ctrlutil.RemoveFinalizer(o, aiGatewayControllerFinalizer) + if onDeletionFn != nil { + if err := onDeletionFn(ctx, o); err != nil { + // onDeletionFn can return an error, but it should not be a recoverable error. + logger.Error(err, "Failed to handle finalizer deletion", + "namespace", o.GetNamespace(), "name", o.GetName()) + } + } + if err := client.Update(ctx, o); err != nil { + // This shouldn't happen in normal operation, but if it does, we log the error. + logger.Error(err, "Failed to remove finalizer from object", + "namespace", o.GetNamespace(), "name", o.GetName()) + } + } + return true +} diff --git a/internal/controller/controller_test.go b/internal/controller/controller_test.go index e86d92f08e..379bbe696e 100644 --- a/internal/controller/controller_test.go +++ b/internal/controller/controller_test.go @@ -6,8 +6,11 @@ package controller import ( + "context" + "fmt" "testing" + "github.com/go-logr/logr" "github.com/stretchr/testify/require" "go.uber.org/goleak" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -162,3 +165,112 @@ func Test_getSecretNameAndNamespace(t *testing.T) { secretRef.Namespace = nil require.Equal(t, "mysecret.foo", getSecretNameAndNamespace(secretRef, "foo")) } + +func Test_handleFinalizer(t *testing.T) { + tests := []struct { + name string + hasFinalizer bool + hasDeletionTS bool + clientUpdateError bool + onDeletionFnError bool + expectedOnDelete bool + expectedFinalizers []string + expectCallback bool + }{ + { + name: "add finalizer to new object", + hasFinalizer: false, + hasDeletionTS: false, + expectedOnDelete: false, + expectedFinalizers: []string{aiGatewayControllerFinalizer}, + }, + { + name: "add finalizer to new object witt update error", + hasFinalizer: false, + hasDeletionTS: false, + clientUpdateError: true, + expectedOnDelete: false, + expectedFinalizers: []string{aiGatewayControllerFinalizer}, + }, + { + name: "object already has finalizer", + hasFinalizer: true, + hasDeletionTS: false, + expectedOnDelete: false, + expectedFinalizers: []string{aiGatewayControllerFinalizer}, + }, + { + name: "object being deleted, remove finalizer", + hasFinalizer: true, + hasDeletionTS: true, + expectedOnDelete: true, + expectedFinalizers: []string{}, + expectCallback: true, + }, + { + name: "object being deleted, callback error", + hasFinalizer: true, + hasDeletionTS: true, + onDeletionFnError: true, + expectedOnDelete: true, + expectedFinalizers: []string{}, + expectCallback: true, + }, + { + name: "object being deleted, client update error", + hasFinalizer: true, + hasDeletionTS: true, + clientUpdateError: true, + expectedOnDelete: true, + expectedFinalizers: []string{}, + expectCallback: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + obj := &aigv1a1.AIGatewayRoute{ + ObjectMeta: metav1.ObjectMeta{Name: "test-object", Namespace: "test-namespace"}, + } + + if tc.hasFinalizer { + obj.Finalizers = []string{aiGatewayControllerFinalizer} + } + + if tc.hasDeletionTS { + obj.DeletionTimestamp = ptr.To(metav1.Now()) + } + + callbackExecuted := false + var onDeletionFn func(context.Context, *aigv1a1.AIGatewayRoute) error + if tc.expectCallback { + onDeletionFn = func(context.Context, *aigv1a1.AIGatewayRoute) error { + callbackExecuted = true + if tc.onDeletionFnError { + return fmt.Errorf("mock deletion error") + } + return nil + } + } + onDelete := handleFinalizer(context.Background(), + &mockClient{updateErr: tc.clientUpdateError}, logr.Discard(), obj, onDeletionFn) + require.Equal(t, tc.expectedOnDelete, onDelete) + require.Equal(t, tc.expectedFinalizers, obj.Finalizers) + require.Equal(t, tc.expectCallback, callbackExecuted) + }) + } +} + +// mockClients implements client.Client with a custom Update method for testing purposes. +type mockClient struct { + client.Client + updateErr bool +} + +// Updates implements the client.Client interface for the mock client. +func (m *mockClient) Update(context.Context, client.Object, ...client.UpdateOption) error { + if m.updateErr { + return fmt.Errorf("mock update error") + } + return nil +} diff --git a/tests/controller/controller_test.go b/tests/controller/controller_test.go index d7131ff42a..5dd9e9a13e 100644 --- a/tests/controller/controller_test.go +++ b/tests/controller/controller_test.go @@ -335,17 +335,40 @@ func TestAIGatewayRouteController(t *testing.T) { require.NoError(t, err) }) - t.Run("check statuses", func(t *testing.T) { + t.Run("check finalizer and status", func(t *testing.T) { require.Eventually(t, func() bool { var r aigv1a1.AIGatewayRoute err := c.Get(t.Context(), client.ObjectKey{Name: "myroute", Namespace: "default"}, &r) require.NoError(t, err) + // Check if the finalizer is set. + if len(r.ObjectMeta.Finalizers) == 0 || r.ObjectMeta.Finalizers[0] != "aigateway.envoyproxy.io/finalizer" { + t.Logf("expected finalizer not found: %v", r.ObjectMeta.Finalizers) + return false + } if len(r.Status.Conditions) != 1 { return false } return r.Status.Conditions[0].Type == aigv1a1.ConditionTypeAccepted }, 30*time.Second, 200*time.Millisecond) }) + + t.Run("delete route", func(t *testing.T) { + err := c.Delete(t.Context(), origin) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var r aigv1a1.AIGatewayRoute + err = c.Get(t.Context(), client.ObjectKey{Name: "myroute", Namespace: "default"}, &r) + if err == nil || client.IgnoreNotFound(err) != nil { + t.Logf("expected not found error, got: %v", err) + return false + } + // On deletion, the event should be sent to the event channel to propagate the deletion to the Gateway. + events := eventCh.RequireItemsEventually(t, 1) + require.Equal(t, gatewayName, events[0].Name) + return true + }, 30*time.Second, 200*time.Millisecond) + }) } func TestBackendSecurityPolicyController(t *testing.T) { @@ -362,7 +385,7 @@ func TestBackendSecurityPolicyController(t *testing.T) { require.NoError(t, err) go func() { - err := mgr.Start(t.Context()) + err = mgr.Start(t.Context()) require.NoError(t, err) }() @@ -431,7 +454,7 @@ func TestBackendSecurityPolicyController(t *testing.T) { }) t.Run("update security policy", func(t *testing.T) { - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { origin := aigv1a1.BackendSecurityPolicy{} require.NoError(t, c.Get(t.Context(), client.ObjectKey{Name: backendSecurityPolicyName, Namespace: backendSecurityPolicyNamespace}, &origin)) origin.Spec.APIKey = nil @@ -460,17 +483,50 @@ func TestBackendSecurityPolicyController(t *testing.T) { require.Equal(t, originals, backends) }) - t.Run("check statuses", func(t *testing.T) { + t.Run("check finalizer and status", func(t *testing.T) { require.Eventually(t, func() bool { var r aigv1a1.BackendSecurityPolicy - err := c.Get(t.Context(), client.ObjectKey{Name: backendSecurityPolicyName, Namespace: backendSecurityPolicyNamespace}, &r) + err = c.Get(t.Context(), client.ObjectKey{Name: backendSecurityPolicyName, Namespace: backendSecurityPolicyNamespace}, &r) require.NoError(t, err) + // Check if the finalizer is set. + if len(r.ObjectMeta.Finalizers) == 0 || r.ObjectMeta.Finalizers[0] != "aigateway.envoyproxy.io/finalizer" { + t.Logf("expected finalizer not found: %v", r.ObjectMeta.Finalizers) + return false + } if len(r.Status.Conditions) != 1 { return false } return r.Status.Conditions[0].Type == aigv1a1.ConditionTypeAccepted }, 30*time.Second, 200*time.Millisecond) }) + + t.Run("delete bsp", func(t *testing.T) { + err = c.Delete(t.Context(), &aigv1a1.BackendSecurityPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: backendSecurityPolicyName, + Namespace: backendSecurityPolicyNamespace, + }, + }) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var bsp aigv1a1.BackendSecurityPolicy + err = c.Get(t.Context(), client.ObjectKey{Name: backendSecurityPolicyName, Namespace: backendSecurityPolicyNamespace}, &bsp) + if err == nil || client.IgnoreNotFound(err) != nil { + t.Logf("expected not found error, got: %v", err) + return false + } + // On deletion, the event should be sent to the event channel to propagate the deletion to the Gateway. + backends := eventCh.RequireItemsEventually(t, 2) + sort.Slice(backends, func(i, j int) bool { + backends[i].TypeMeta = metav1.TypeMeta{} + backends[j].TypeMeta = metav1.TypeMeta{} + return backends[i].Name < backends[j].Name + }) + require.Equal(t, originals, backends) + return true + }, 30*time.Second, 200*time.Millisecond) + }) } func TestAIServiceBackendController(t *testing.T) { @@ -581,17 +637,51 @@ func TestAIServiceBackendController(t *testing.T) { require.Equal(t, originals, routes) }) - t.Run("check statuses", func(t *testing.T) { + t.Run("check finalizer and status", func(t *testing.T) { require.Eventually(t, func() bool { var r aigv1a1.AIServiceBackend - err := c.Get(t.Context(), client.ObjectKey{Name: aiServiceBackendName, Namespace: aiServiceBackendNamespace}, &r) + err = c.Get(t.Context(), client.ObjectKey{Name: aiServiceBackendName, Namespace: aiServiceBackendNamespace}, &r) require.NoError(t, err) + // Check if the finalizer is set. + if len(r.ObjectMeta.Finalizers) == 0 || r.ObjectMeta.Finalizers[0] != "aigateway.envoyproxy.io/finalizer" { + t.Logf("expected finalizer not found: %v", r.ObjectMeta.Finalizers) + return false + } + if len(r.Status.Conditions) != 1 { return false } return r.Status.Conditions[0].Type == aigv1a1.ConditionTypeAccepted }, 30*time.Second, 200*time.Millisecond) }) + + t.Run("delete backend", func(t *testing.T) { + err = c.Delete(t.Context(), &aigv1a1.AIServiceBackend{ + ObjectMeta: metav1.ObjectMeta{ + Name: aiServiceBackendName, + Namespace: aiServiceBackendNamespace, + }, + }) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var r aigv1a1.AIServiceBackend + err = c.Get(t.Context(), client.ObjectKey{Name: aiServiceBackendName, Namespace: aiServiceBackendNamespace}, &r) + if err == nil || client.IgnoreNotFound(err) != nil { + t.Logf("expected not found error, got: %v", err) + return false + } + // On deletion, the event should be sent to the event channel to propagate the deletion to the Gateway. + routes := eventCh.RequireItemsEventually(t, 2) + sort.Slice(routes, func(i, j int) bool { + routes[i].TypeMeta = metav1.TypeMeta{} + routes[j].TypeMeta = metav1.TypeMeta{} + return routes[i].Name < routes[j].Name + }) + require.Equal(t, originals, routes) + return true + }, 30*time.Second, 200*time.Millisecond) + }) } func TestSecretController(t *testing.T) {