From 60dfef0290b6e532af1ce7d3c088124512fe8e26 Mon Sep 17 00:00:00 2001 From: Patryk Rostkowski Date: Thu, 12 Jun 2025 19:30:24 +0200 Subject: [PATCH 01/12] fix: add isTransientError helper to classify retryable errors Introduces isTransientError to detect transient Kubernetes errors and enable proper reconciliation retries. Signed-off-by: Patryk Rostkowski --- internal/provider/kubernetes/controller.go | 49 +++++++++++++++++++ .../provider/kubernetes/controller_test.go | 31 ++++++++++++ 2 files changed, 80 insertions(+) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index 448ab5a107..edad83f4f4 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -179,6 +179,15 @@ func byNamespaceSelectorEnabled(eg *egv1a1.EnvoyGateway) bool { } } +func isTransientError(err error) bool { + return kerrors.IsServerTimeout(err) || + kerrors.IsTimeout(err) || + kerrors.IsTooManyRequests(err) || + kerrors.IsServiceUnavailable(err) || + kerrors.IsStoreReadError(err) || + kerrors.IsUnexpectedServerError(err) +} + // Reconcile handles reconciling all resources in a single call. Any resource event should enqueue the // same reconcile.Request containing the gateway controller name. This allows multiple resource updates to // be handled by a single call to Reconcile. The reconcile.Request DOES NOT map to a specific resource. @@ -221,6 +230,10 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // This should run before processGateways and processBackendRefs if managedGC.Spec.ParametersRef != nil && managedGC.DeletionTimestamp == nil { if err := r.processGatewayClassParamsRef(ctx, managedGC, resourceMappings, gwcResource); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } + msg := fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err) gc := status.SetGatewayClassAccepted( managedGC.DeepCopy(), @@ -234,6 +247,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all Gateways, their associated Routes, and referenced resources to the resourceTree if err = r.processGateways(ctx, managedGC, resourceMappings, gwcResource); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processGateways for gatewayClass %s, skipping it", managedGC.Name)) continue } @@ -241,6 +257,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if r.eppCRDExists { // Add all EnvoyPatchPolicies to the resourceTree if err = r.processEnvoyPatchPolicies(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processEnvoyPatchPolicies for gatewayClass %s, skipping it", managedGC.Name)) continue } @@ -248,6 +267,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if r.ctpCRDExists { // Add all ClientTrafficPolicies and their referenced resources to the resourceTree if err = r.processClientTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processClientTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) continue } @@ -256,6 +278,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if r.btpCRDExists { // Add all BackendTrafficPolicies to the resourceTree if err = r.processBackendTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processBackendTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) continue } @@ -264,6 +289,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if r.spCRDExists { // Add all SecurityPolicies and their referenced resources to the resourceTree if err = r.processSecurityPolicies(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processSecurityPolicies for gatewayClass %s, skipping it", managedGC.Name)) continue } @@ -272,6 +300,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if r.bTLSPolicyCRDExists { // Add all BackendTLSPolies to the resourceTree if err = r.processBackendTLSPolicies(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processBackendTLSPolicies for gatewayClass %s, skipping it", managedGC.Name)) continue } @@ -280,18 +311,27 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if r.eepCRDExists { // Add all EnvoyExtensionPolicies and their referenced resources to the resourceTree if err = r.processEnvoyExtensionPolicies(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processEnvoyExtensionPolicies for gatewayClass %s, skipping it", managedGC.Name)) continue } } if err = r.processExtensionServerPolicies(ctx, gwcResource); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processExtensionServerPolicies for gatewayClass %s, skipping it", managedGC.Name)) continue } if r.backendCRDExists { if err = r.processBackends(ctx, gwcResource); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processBackends for gatewayClass %s, skipping it", managedGC.Name)) continue } @@ -307,6 +347,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques for ns := range resourceMappings.allAssociatedNamespaces { namespace, err := r.getNamespace(ctx, ns) if err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, "unable to find the namespace") if kerrors.IsNotFound(err) { continue @@ -339,6 +382,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // If needed, remove the finalizer from the accepted GatewayClass. if err := r.removeFinalizer(ctx, managedGC); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed to remove finalizer from gatewayClass %s", managedGC.Name)) continue @@ -346,6 +392,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } else { // finalize the accepted GatewayClass. if err := r.addFinalizer(ctx, managedGC); err != nil { + if isTransientError(err) { + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed adding finalizer to gatewayClass %s", managedGC.Name)) continue diff --git a/internal/provider/kubernetes/controller_test.go b/internal/provider/kubernetes/controller_test.go index 86eba77554..c0e2d61155 100644 --- a/internal/provider/kubernetes/controller_test.go +++ b/internal/provider/kubernetes/controller_test.go @@ -11,7 +11,9 @@ import ( "testing" "github.com/stretchr/testify/require" + kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -1022,3 +1024,32 @@ func setupReferenceGrantReconciler(objs []client.Object) *gatewayAPIReconciler { Build() return r } + +func TestIsTransientError(t *testing.T) { + serverTimeoutErr := kerrors.NewServerTimeout( + schema.GroupResource{Group: "core", Resource: "pods"}, "list", 10) + timeoutErr := kerrors.NewTimeoutError("request timeout", 1) + tooManyRequestsErr := kerrors.NewTooManyRequests("too many requests", 1) + serviceUnavailableErr := kerrors.NewServiceUnavailable("service unavailable") + badRequestErr := kerrors.NewBadRequest("bad request") + + testCases := []struct { + name string + err error + expected bool + }{ + {"ServerTimeout", serverTimeoutErr, true}, + {"Timeout", timeoutErr, true}, + {"TooManyRequests", tooManyRequestsErr, true}, + {"ServiceUnavailable", serviceUnavailableErr, true}, + {"BadRequest", badRequestErr, false}, + {"NilError", nil, false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actual := isTransientError(tc.err) + require.Equal(t, tc.expected, actual) + }) + } +} From dddfa10dce7de7e675454546f196508579179d78 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Thu, 26 Jun 2025 02:39:38 +0000 Subject: [PATCH 02/12] add withErrors flag to Gateway API resources Signed-off-by: Huabing (Robin) Zhao remove debug info Signed-off-by: Huabing (Robin) Zhao minor change Signed-off-by: Huabing (Robin) Zhao minor change Signed-off-by: Huabing (Robin) Zhao minor change Signed-off-by: Huabing (Robin) Zhao fix lint Signed-off-by: Huabing (Robin) Zhao handle errors from processing BackendRefs Signed-off-by: Huabing (Robin) Zhao handle errors from processing ConfigMap Signed-off-by: Huabing (Robin) Zhao fix test Signed-off-by: Huabing (Robin) Zhao add test Signed-off-by: Huabing (Robin) Zhao --- internal/gatewayapi/resource/resource.go | 5 + internal/gatewayapi/runner/runner.go | 52 ++++-- internal/provider/kubernetes/controller.go | 165 +++++++++++++----- .../provider/kubernetes/controller_test.go | 5 +- test/e2e/testdata/provider-error.yaml | 12 ++ test/e2e/tests/provider_error.go | 103 +++++++++++ 6 files changed, 288 insertions(+), 54 deletions(-) create mode 100644 test/e2e/testdata/provider-error.yaml create mode 100644 test/e2e/tests/provider_error.go diff --git a/internal/gatewayapi/resource/resource.go b/internal/gatewayapi/resource/resource.go index c4f2757c7d..717cd692eb 100644 --- a/internal/gatewayapi/resource/resource.go +++ b/internal/gatewayapi/resource/resource.go @@ -33,6 +33,11 @@ type ( // resources that the translators needs as inputs. // +k8s:deepcopy-gen=true type Resources struct { + // HasErrors indicates whether the resources contain errors that may affect xDS generation. + // For example, if some resources are missing due to errors in the Provider layer, EG should not update the xDS cache. + // This ensures the xDS cache is not updated with incomplete or erroneous data and the Envoy Proxy can continue to serve requests + // with the last known good configuration. + HasErrors bool `json:"hasErrors,omitempty" yaml:"hasErrors,omitempty"` // This field is only used for marshalling/unmarshalling purposes and is not used by // the translator diff --git a/internal/gatewayapi/runner/runner.go b/internal/gatewayapi/runner/runner.go index 68b30e9e99..2d0c39c624 100644 --- a/internal/gatewayapi/runner/runner.go +++ b/internal/gatewayapi/runner/runner.go @@ -9,6 +9,7 @@ import ( "context" "crypto/tls" "encoding/json" + "errors" "fmt" "os" "path" @@ -180,30 +181,57 @@ func (r *Runner) subscribeAndTranslate(sub <-chan watchable.Snapshot[string, *re r.Logger.Error(err, "errors detected during translation") } + // Do not publish IRs if there are errors from the provider layer when previous IRs exist. + // This allows the Envoy Proxy to continue serving traffic with the last known good state. + // TODO: zhaohuabing should we also skip publishing IRs if there are errors in the Gateway API translation? + skippedIRs := map[string]struct{}{} + if resources.HasErrors { + for key := range result.XdsIR { + if _, exist := r.XdsIR.Load(key); exist { + skippedIRs[key] = struct{}{} + } + } + } + // Publish the IRs. // Also validate the ir before sending it. for key, val := range result.InfraIR { - r.Logger.V(1).WithValues("infra-ir", key).Info(val.JSONString()) - if err := val.Validate(); err != nil { - r.Logger.Error(err, "unable to validate infra ir, skipped sending it") - errChan <- err + // Preserve the existing IR even if there are validation or provider-layer errors. + // This ensures the Envoy Proxy can continue serving traffic using the last known good state. + newIRKeys = append(newIRKeys, key) + + if _, shouldSkip := skippedIRs[key]; !shouldSkip { + r.Logger.V(1).WithValues("infra-ir", key).Info(val.JSONString()) + if err := val.Validate(); err != nil { + r.Logger.Error(err, "skipped publishing infra ir due to validation errors", "key", key) + errChan <- err + } else { + r.InfraIR.Store(key, val) + } } else { - r.InfraIR.Store(key, val) - newIRKeys = append(newIRKeys, key) + r.Logger.Error(errors.New("skipped publishing infra ir due to errors in resources"), + "skipped sending infra ir", "key", key) } } for key, val := range result.XdsIR { - r.Logger.V(1).WithValues("xds-ir", key).Info(val.JSONString()) - if err := val.Validate(); err != nil { - r.Logger.Error(err, "unable to validate xds ir, skipped sending it") - errChan <- err + if _, shouldSkip := skippedIRs[key]; !shouldSkip { + r.Logger.V(1).WithValues("xds-ir", key).Info(val.JSONString()) + if err := val.Validate(); err != nil { + r.Logger.Error(err, "skipped publishing xds ir due to validation errors", "key", key) + errChan <- err + } else { + r.XdsIR.Store(key, val) + } } else { - r.XdsIR.Store(key, val) + r.Logger.Error( + errors.New("skipped publishing xds ir due to errors in resources, the last known good state will be used"), + "skipped sending xds ir", "key", key) } } - // Update Status + // Update Status regardless of errors in the resources or validation. + // This surfaces the errors to resource statuses to aid in debugging. for _, gateway := range result.Gateways { key := utils.NamespacedName(gateway) r.ProviderResources.GatewayStatuses.Store(key, &gateway.Status) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index edad83f4f4..81d0feaa1c 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -226,52 +226,90 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques gwcResources = append(gwcResources, gwcResource) resourceMappings := newResourceMapping() + // errors are non-transient errors while processing the Gateway and Envoy Gateway resources. + // If there are providerErrors, the gwcResource of the GatewayClass will be marked and the xDS cache will not be updated. + // If transient errors are encountered, the error will be returned and the reconcile loop will be retried. + var errs []error + // Process the parametersRef of the accepted GatewayClass. // This should run before processGateways and processBackendRefs if managedGC.Spec.ParametersRef != nil && managedGC.DeletionTimestamp == nil { - if err := r.processGatewayClassParamsRef(ctx, managedGC, resourceMappings, gwcResource); err != nil { + if err = r.processGatewayClassParamsRef(ctx, managedGC, resourceMappings, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing GatewayClass parametersRef", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - msg := fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err) + r.log.Error(err, fmt.Sprintf("failed processGatewayClassParamsRef for gatewayClass %s, skipping it", managedGC.Name)) + errs = append(errs, err) gc := status.SetGatewayClassAccepted( managedGC.DeepCopy(), false, string(gwapiv1.GatewayClassReasonInvalidParameters), - msg) + fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - continue } } + // process EnvoyProxy secret refs if the GatewayClass has a EnvoyProxy configured + if err = r.processEnvoyProxySecretRef(ctx, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing EnvoyProxy SecretRef", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + + r.log.Error(err, fmt.Sprintf("failed to process EnvoyProxy SecretRef for gatewayClass %s, skipping it", managedGC.Name)) + errs = append(errs, err) + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + false, + string(gwapiv1.GatewayClassReasonAccepted), + fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + } else { + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + true, + string(gwapiv1.GatewayClassReasonAccepted), + status.MsgValidGatewayClass) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + } + // Add all Gateways, their associated Routes, and referenced resources to the resourceTree if err = r.processGateways(ctx, managedGC, resourceMappings, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing Gateways", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processGateways for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } + // Add all xPolicies and their referenced resources to the resourceTree. if r.eppCRDExists { // Add all EnvoyPatchPolicies to the resourceTree if err = r.processEnvoyPatchPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing EnvoyPatchPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processEnvoyPatchPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } } + if r.ctpCRDExists { // Add all ClientTrafficPolicies and their referenced resources to the resourceTree if err = r.processClientTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing ClientTrafficPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processClientTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } } @@ -279,10 +317,12 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all BackendTrafficPolicies to the resourceTree if err = r.processBackendTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing BackendTrafficPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processBackendTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } } @@ -290,10 +330,12 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all SecurityPolicies and their referenced resources to the resourceTree if err = r.processSecurityPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing SecurityPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processSecurityPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } } @@ -301,10 +343,12 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all BackendTLSPolies to the resourceTree if err = r.processBackendTLSPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing BackendTLSPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processBackendTLSPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } } @@ -312,35 +356,49 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all EnvoyExtensionPolicies and their referenced resources to the resourceTree if err = r.processEnvoyExtensionPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing EnvoyExtensionPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processEnvoyExtensionPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } } if err = r.processExtensionServerPolicies(ctx, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing ExtensionServerPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processExtensionServerPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } if r.backendCRDExists { if err = r.processBackends(ctx, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing Backends", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed processBackends for gatewayClass %s, skipping it", managedGC.Name)) - continue + errs = append(errs, err) } } // Add the referenced services, ServiceImports, and EndpointSlices in // the collected BackendRefs to the resourceTree. // BackendRefs are referred by various Route objects and the ExtAuth in SecurityPolicies. - r.processBackendRefs(ctx, gwcResource, resourceMappings) + if err = r.processBackendRefs(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing BackendRefs", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + + r.log.Error(err, fmt.Sprintf("failed processBackendRefs for gatewayClass %s, skipping it", managedGC.Name)) + errs = append(errs, err) + } // For this particular Gateway, and all associated objects, check whether the // namespace exists. Add to the resourceTree. @@ -348,13 +406,12 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques namespace, err := r.getNamespace(ctx, ns) if err != nil { if isTransientError(err) { + r.log.Error(err, "transient error getting namespace", "namespace", ns) return reconcile.Result{}, err } - r.log.Error(err, "unable to find the namespace") - if kerrors.IsNotFound(err) { - continue - } - continue + + r.log.Error(err, "failed to get namespace", "namespace", ns) + errs = append(errs, err) } gwcResource.Namespaces = append(gwcResource.Namespaces, namespace) @@ -368,15 +425,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } } - // process envoy gateway secret refs - r.processEnvoyProxySecretRef(ctx, gwcResource) - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - true, - string(gwapiv1.GatewayClassReasonAccepted), - status.MsgValidGatewayClass) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - if len(gwcResource.Gateways) == 0 { r.log.Info("No gateways found for accepted gatewayClass") @@ -385,9 +433,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if isTransientError(err) { return reconcile.Result{}, err } + r.log.Error(err, fmt.Sprintf("failed to remove finalizer from gatewayClass %s", managedGC.Name)) - continue } } else { // finalize the accepted GatewayClass. @@ -395,11 +443,17 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if isTransientError(err) { return reconcile.Result{}, err } + + // Failed to add finalizer should not affect the xDS cache update. + // Log the error and continue processing. r.log.Error(err, fmt.Sprintf("failed adding finalizer to gatewayClass %s", managedGC.Name)) - continue } } + + // Set HasErrors to true if there are any non-transient errors during processing of the Gateway and Envoy Gateway resources. + // This will be used to determine whether the xDS cache should be updated or not. + gwcResource.HasErrors = (len(errs) > 0) } // Store the Gateway Resources for the GatewayClass. @@ -412,10 +466,13 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, nil } -func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) { - if gwcResource.EnvoyProxyForGatewayClass == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef == nil { - return +func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) error { + if gwcResource.EnvoyProxyForGatewayClass == nil || + gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS == nil || + gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef == nil { + return nil } + certRef := gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef if refsSecret(certRef) { if err := r.processSecretRef( @@ -426,11 +483,15 @@ func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, g gwcResource.EnvoyProxyForGatewayClass.Namespace, resource.KindEnvoyProxy, *certRef); err != nil { - r.log.Error(err, - "failed to process TLS SecretRef for EnvoyProxy", - "gateway", "issue", "secretRef", certRef) + return fmt.Errorf("failed to process EnvoyProxy SecretRef: %w", err) } + } else { + // only secret is supported for ClientCertificateRef + return fmt.Errorf("EnvoyProxy %s/%s has unsupported ClientCertificateRef %v", + gwcResource.EnvoyProxyForGatewayClass.Namespace, + gwcResource.EnvoyProxyForGatewayClass.Name, *certRef) } + return nil } // managedGatewayClasses returns a list of GatewayClass objects that are managed by the Envoy Gateway Controller. @@ -466,7 +527,11 @@ func (r *gatewayAPIReconciler) managedGatewayClasses(ctx context.Context) ([]*gw // - EndpointSlices // - Backends // - CACertificateRefs in the Backends -func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) { +func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) error { + // Only transient errors are returned from this function to allow Reconcile to retry. + // All other errors are just logged and ignored - these errors result in missing referenced backend resources + // in the resource tree, which is acceptable as the Gateway API translation layer will handle them. + // The Gateway API translation layer will surface these errors in the status of the resources referencing them. for backendRef := range resourceMappings.allAssociatedBackendRefs { backendRefKind := gatewayapi.KindDerefOr(backendRef.Kind, resource.KindService) r.log.Info("processing Backend", "kind", backendRefKind, "namespace", string(*backendRef.Namespace), @@ -478,6 +543,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour service := new(corev1.Service) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, service) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get Service", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -492,6 +560,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour serviceImport := new(mcsapiv1a1.ServiceImport) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, serviceImport) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get ServiceImport", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -510,6 +581,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour backend := new(egv1a1.Backend) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, backend) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get Backend", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -558,6 +632,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour caRefNew) } if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process CACertificateRef for Backend", "backend", backend, "caCertificateRef", caCertRef.Name) @@ -577,6 +654,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour client.InNamespace(*backendRef.Namespace), } if err := r.client.List(ctx, endpointSliceList, opts...); err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get EndpointSlices", "namespace", string(*backendRef.Namespace), backendRefKind, string(backendRef.Name)) } else { @@ -593,6 +673,7 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour } } } + return nil } // processSecurityPolicyObjectRefs adds the referenced resources in SecurityPolicies @@ -1025,7 +1106,7 @@ func (r *gatewayAPIReconciler) processConfigMapRef( // to the resourceTree func (r *gatewayAPIReconciler) processBtpConfigMapRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { for _, policy := range resourceTree.BackendTrafficPolicies { for _, ro := range policy.Spec.ResponseOverride { if ro.Response.Body != nil && ro.Response.Body.ValueRef != nil && string(ro.Response.Body.ValueRef.Kind) == resource.KindConfigMap { @@ -1041,6 +1122,10 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( // when translating to IR because the referenced configmap can't be // found. if err != nil { + // Only transient errors are returned from this function to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process ResponseOverride ValueRef for BackendTrafficPolicy", "policy", policy, "ValueRef", ro.Response.Body.ValueRef.Name) @@ -1055,6 +1140,7 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( } } } + return nil } func (r *gatewayAPIReconciler) getNamespace(ctx context.Context, name string) (*corev1.Namespace, error) { @@ -1293,8 +1379,7 @@ func (r *gatewayAPIReconciler) processBackendTrafficPolicies(ctx context.Context resourceTree.BackendTrafficPolicies = append(resourceTree.BackendTrafficPolicies, &backendTrafficPolicy) } } - r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) - return nil + return r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) } // processSecurityPolicies adds SecurityPolicies and their referenced resources to the resourceTree diff --git a/internal/provider/kubernetes/controller_test.go b/internal/provider/kubernetes/controller_test.go index c0e2d61155..f374f04680 100644 --- a/internal/provider/kubernetes/controller_test.go +++ b/internal/provider/kubernetes/controller_test.go @@ -7,6 +7,7 @@ package kubernetes import ( "context" + "fmt" "os" "testing" @@ -1028,7 +1029,7 @@ func setupReferenceGrantReconciler(objs []client.Object) *gatewayAPIReconciler { func TestIsTransientError(t *testing.T) { serverTimeoutErr := kerrors.NewServerTimeout( schema.GroupResource{Group: "core", Resource: "pods"}, "list", 10) - timeoutErr := kerrors.NewTimeoutError("request timeout", 1) + timeoutErr := fmt.Errorf("wrapped request timeout: %w", kerrors.NewTimeoutError("request timeout", 1)) tooManyRequestsErr := kerrors.NewTooManyRequests("too many requests", 1) serviceUnavailableErr := kerrors.NewServiceUnavailable("service unavailable") badRequestErr := kerrors.NewBadRequest("bad request") @@ -1039,7 +1040,7 @@ func TestIsTransientError(t *testing.T) { expected bool }{ {"ServerTimeout", serverTimeoutErr, true}, - {"Timeout", timeoutErr, true}, + {"Wrapped Timeout", timeoutErr, true}, {"TooManyRequests", tooManyRequestsErr, true}, {"ServiceUnavailable", serviceUnavailableErr, true}, {"BadRequest", badRequestErr, false}, diff --git a/test/e2e/testdata/provider-error.yaml b/test/e2e/testdata/provider-error.yaml new file mode 100644 index 0000000000..26ccbfb017 --- /dev/null +++ b/test/e2e/testdata/provider-error.yaml @@ -0,0 +1,12 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: simple-http-route + namespace: gateway-conformance-infra +spec: + parentRefs: + - name: same-namespace + rules: + - backendRefs: + - name: infra-backend-v1 + port: 8080 diff --git a/test/e2e/tests/provider_error.go b/test/e2e/tests/provider_error.go new file mode 100644 index 0000000000..ce7665b9eb --- /dev/null +++ b/test/e2e/tests/provider_error.go @@ -0,0 +1,103 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +//go:build e2e + +package tests + +import ( + "context" + "testing" + "time" + + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" + "sigs.k8s.io/gateway-api/conformance/utils/http" + "sigs.k8s.io/gateway-api/conformance/utils/kubernetes" + "sigs.k8s.io/gateway-api/conformance/utils/suite" + + egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" + "github.com/envoyproxy/gateway/internal/gatewayapi" +) + +func init() { + ConformanceTests = append(ConformanceTests, ProviderErrorTest) +} + +var ProviderErrorTest = suite.ConformanceTest{ + ShortName: "ProviderError", + Description: "Test error handling in the provider", + Manifests: []string{"testdata/provider-error.yaml"}, + Test: func(t *testing.T, suite *suite.ConformanceTestSuite) { + t.Run("Continue serving requests with last known good configuration when errors are present in the provider", func(t *testing.T) { + ns := "gateway-conformance-infra" + routeNN := types.NamespacedName{Name: "simple-http-route", Namespace: ns} + gwNN := types.NamespacedName{Name: "same-namespace", Namespace: ns} + gwAddr := kubernetes.GatewayAndHTTPRoutesMustBeAccepted(t, suite.Client, suite.TimeoutConfig, suite.ControllerName, kubernetes.NewGatewayRef(gwNN), routeNN) + + // send a request to the gateway + expectedResponse := http.ExpectedResponse{ + Request: http.Request{ + Path: "/", + }, + Response: http.Response{ + StatusCode: 200, + }, + Namespace: ns, + } + + http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, gwAddr, expectedResponse) + + // Update the Envoy Proxy client TLS settings to an invalid value + // This causes the provider to return an error when processing the resources + proxyNN := types.NamespacedName{Name: "proxy-config", Namespace: "envoy-gateway-system"} + config := &egv1a1.BackendTLSConfig{ + ClientCertificateRef: &gwapiv1.SecretObjectReference{ + Kind: gatewayapi.KindPtr("Secret"), + Name: "client-tls-certificate", + Namespace: gatewayapi.NamespacePtr("envoy-gateway-system"), + }, + } + // We set replicas to 0, if EG doesn't skip the IRs with errors, the envoy proxy pods will be scaled down to 0 + // and the gateway will stop serving traffic. + err := UpdateEnvoyProxy(suite.Client, proxyNN, config, 0) + if err != nil { + t.Error(err) + } + + // Wait for a short period to allow the provider to process the resources and encounter the error + time.Sleep(5 * time.Second) + // We expect the gateway to continue serving requests with the last known good configuration + http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, gwAddr, expectedResponse) + + // Set the Envoy Proxy client TLS settings back to the valid value + err = UpdateEnvoyProxy(suite.Client, proxyNN, &egv1a1.BackendTLSConfig{ + ClientCertificateRef: nil, + TLSSettings: egv1a1.TLSSettings{}, + }, 1) + if err != nil { + t.Error(err) + } + }) + }, +} + +func UpdateEnvoyProxy(client client.Client, proxyNN types.NamespacedName, config *egv1a1.BackendTLSConfig, replica int32) error { + proxyConfig := &egv1a1.EnvoyProxy{} + err := client.Get(context.Background(), proxyNN, proxyConfig) + if err != nil { + return err + } + + proxyConfig.Spec.BackendTLS = config + proxyConfig.Spec.Provider.Kubernetes.EnvoyDeployment.Replicas = ptr.To(replica) + err = client.Update(context.Background(), proxyConfig) + if err != nil { + return err + } + return nil +} From 0b0d29ade006081f9f776d065ebb4ecaa51f95f4 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Wed, 2 Jul 2025 04:56:46 +0000 Subject: [PATCH 03/12] Revert "add withErrors flag to Gateway API resources" This reverts commit dddfa10dce7de7e675454546f196508579179d78. --- internal/gatewayapi/resource/resource.go | 5 - internal/gatewayapi/runner/runner.go | 52 ++---- internal/provider/kubernetes/controller.go | 165 +++++------------- .../provider/kubernetes/controller_test.go | 5 +- test/e2e/testdata/provider-error.yaml | 12 -- test/e2e/tests/provider_error.go | 103 ----------- 6 files changed, 54 insertions(+), 288 deletions(-) delete mode 100644 test/e2e/testdata/provider-error.yaml delete mode 100644 test/e2e/tests/provider_error.go diff --git a/internal/gatewayapi/resource/resource.go b/internal/gatewayapi/resource/resource.go index 717cd692eb..c4f2757c7d 100644 --- a/internal/gatewayapi/resource/resource.go +++ b/internal/gatewayapi/resource/resource.go @@ -33,11 +33,6 @@ type ( // resources that the translators needs as inputs. // +k8s:deepcopy-gen=true type Resources struct { - // HasErrors indicates whether the resources contain errors that may affect xDS generation. - // For example, if some resources are missing due to errors in the Provider layer, EG should not update the xDS cache. - // This ensures the xDS cache is not updated with incomplete or erroneous data and the Envoy Proxy can continue to serve requests - // with the last known good configuration. - HasErrors bool `json:"hasErrors,omitempty" yaml:"hasErrors,omitempty"` // This field is only used for marshalling/unmarshalling purposes and is not used by // the translator diff --git a/internal/gatewayapi/runner/runner.go b/internal/gatewayapi/runner/runner.go index 2d0c39c624..68b30e9e99 100644 --- a/internal/gatewayapi/runner/runner.go +++ b/internal/gatewayapi/runner/runner.go @@ -9,7 +9,6 @@ import ( "context" "crypto/tls" "encoding/json" - "errors" "fmt" "os" "path" @@ -181,57 +180,30 @@ func (r *Runner) subscribeAndTranslate(sub <-chan watchable.Snapshot[string, *re r.Logger.Error(err, "errors detected during translation") } - // Do not publish IRs if there are errors from the provider layer when previous IRs exist. - // This allows the Envoy Proxy to continue serving traffic with the last known good state. - // TODO: zhaohuabing should we also skip publishing IRs if there are errors in the Gateway API translation? - skippedIRs := map[string]struct{}{} - if resources.HasErrors { - for key := range result.XdsIR { - if _, exist := r.XdsIR.Load(key); exist { - skippedIRs[key] = struct{}{} - } - } - } - // Publish the IRs. // Also validate the ir before sending it. for key, val := range result.InfraIR { - // Preserve the existing IR even if there are validation or provider-layer errors. - // This ensures the Envoy Proxy can continue serving traffic using the last known good state. - newIRKeys = append(newIRKeys, key) - - if _, shouldSkip := skippedIRs[key]; !shouldSkip { - r.Logger.V(1).WithValues("infra-ir", key).Info(val.JSONString()) - if err := val.Validate(); err != nil { - r.Logger.Error(err, "skipped publishing infra ir due to validation errors", "key", key) - errChan <- err - } else { - r.InfraIR.Store(key, val) - } + r.Logger.V(1).WithValues("infra-ir", key).Info(val.JSONString()) + if err := val.Validate(); err != nil { + r.Logger.Error(err, "unable to validate infra ir, skipped sending it") + errChan <- err } else { - r.Logger.Error(errors.New("skipped publishing infra ir due to errors in resources"), - "skipped sending infra ir", "key", key) + r.InfraIR.Store(key, val) + newIRKeys = append(newIRKeys, key) } } for key, val := range result.XdsIR { - if _, shouldSkip := skippedIRs[key]; !shouldSkip { - r.Logger.V(1).WithValues("xds-ir", key).Info(val.JSONString()) - if err := val.Validate(); err != nil { - r.Logger.Error(err, "skipped publishing xds ir due to validation errors", "key", key) - errChan <- err - } else { - r.XdsIR.Store(key, val) - } + r.Logger.V(1).WithValues("xds-ir", key).Info(val.JSONString()) + if err := val.Validate(); err != nil { + r.Logger.Error(err, "unable to validate xds ir, skipped sending it") + errChan <- err } else { - r.Logger.Error( - errors.New("skipped publishing xds ir due to errors in resources, the last known good state will be used"), - "skipped sending xds ir", "key", key) + r.XdsIR.Store(key, val) } } - // Update Status regardless of errors in the resources or validation. - // This surfaces the errors to resource statuses to aid in debugging. + // Update Status for _, gateway := range result.Gateways { key := utils.NamespacedName(gateway) r.ProviderResources.GatewayStatuses.Store(key, &gateway.Status) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index 81d0feaa1c..edad83f4f4 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -226,90 +226,52 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques gwcResources = append(gwcResources, gwcResource) resourceMappings := newResourceMapping() - // errors are non-transient errors while processing the Gateway and Envoy Gateway resources. - // If there are providerErrors, the gwcResource of the GatewayClass will be marked and the xDS cache will not be updated. - // If transient errors are encountered, the error will be returned and the reconcile loop will be retried. - var errs []error - // Process the parametersRef of the accepted GatewayClass. // This should run before processGateways and processBackendRefs if managedGC.Spec.ParametersRef != nil && managedGC.DeletionTimestamp == nil { - if err = r.processGatewayClassParamsRef(ctx, managedGC, resourceMappings, gwcResource); err != nil { + if err := r.processGatewayClassParamsRef(ctx, managedGC, resourceMappings, gwcResource); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing GatewayClass parametersRef", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processGatewayClassParamsRef for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + msg := fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err) gc := status.SetGatewayClassAccepted( managedGC.DeepCopy(), false, string(gwapiv1.GatewayClassReasonInvalidParameters), - fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) + msg) r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + continue } } - // process EnvoyProxy secret refs if the GatewayClass has a EnvoyProxy configured - if err = r.processEnvoyProxySecretRef(ctx, gwcResource); err != nil { - if isTransientError(err) { - r.log.Error(err, "transient error processing EnvoyProxy SecretRef", "gatewayClass", managedGC.Name) - return reconcile.Result{}, err - } - - r.log.Error(err, fmt.Sprintf("failed to process EnvoyProxy SecretRef for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - false, - string(gwapiv1.GatewayClassReasonAccepted), - fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - } else { - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - true, - string(gwapiv1.GatewayClassReasonAccepted), - status.MsgValidGatewayClass) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - } - // Add all Gateways, their associated Routes, and referenced resources to the resourceTree if err = r.processGateways(ctx, managedGC, resourceMappings, gwcResource); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing Gateways", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processGateways for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } - // Add all xPolicies and their referenced resources to the resourceTree. if r.eppCRDExists { // Add all EnvoyPatchPolicies to the resourceTree if err = r.processEnvoyPatchPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing EnvoyPatchPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processEnvoyPatchPolicies for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } } - if r.ctpCRDExists { // Add all ClientTrafficPolicies and their referenced resources to the resourceTree if err = r.processClientTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing ClientTrafficPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processClientTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } } @@ -317,12 +279,10 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all BackendTrafficPolicies to the resourceTree if err = r.processBackendTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing BackendTrafficPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processBackendTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } } @@ -330,12 +290,10 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all SecurityPolicies and their referenced resources to the resourceTree if err = r.processSecurityPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing SecurityPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processSecurityPolicies for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } } @@ -343,12 +301,10 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all BackendTLSPolies to the resourceTree if err = r.processBackendTLSPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing BackendTLSPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processBackendTLSPolicies for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } } @@ -356,49 +312,35 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all EnvoyExtensionPolicies and their referenced resources to the resourceTree if err = r.processEnvoyExtensionPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing EnvoyExtensionPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processEnvoyExtensionPolicies for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } } if err = r.processExtensionServerPolicies(ctx, gwcResource); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing ExtensionServerPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processExtensionServerPolicies for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } if r.backendCRDExists { if err = r.processBackends(ctx, gwcResource); err != nil { if isTransientError(err) { - r.log.Error(err, "transient error processing Backends", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed processBackends for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) + continue } } // Add the referenced services, ServiceImports, and EndpointSlices in // the collected BackendRefs to the resourceTree. // BackendRefs are referred by various Route objects and the ExtAuth in SecurityPolicies. - if err = r.processBackendRefs(ctx, gwcResource, resourceMappings); err != nil { - if isTransientError(err) { - r.log.Error(err, "transient error processing BackendRefs", "gatewayClass", managedGC.Name) - return reconcile.Result{}, err - } - - r.log.Error(err, fmt.Sprintf("failed processBackendRefs for gatewayClass %s, skipping it", managedGC.Name)) - errs = append(errs, err) - } + r.processBackendRefs(ctx, gwcResource, resourceMappings) // For this particular Gateway, and all associated objects, check whether the // namespace exists. Add to the resourceTree. @@ -406,12 +348,13 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques namespace, err := r.getNamespace(ctx, ns) if err != nil { if isTransientError(err) { - r.log.Error(err, "transient error getting namespace", "namespace", ns) return reconcile.Result{}, err } - - r.log.Error(err, "failed to get namespace", "namespace", ns) - errs = append(errs, err) + r.log.Error(err, "unable to find the namespace") + if kerrors.IsNotFound(err) { + continue + } + continue } gwcResource.Namespaces = append(gwcResource.Namespaces, namespace) @@ -425,6 +368,15 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } } + // process envoy gateway secret refs + r.processEnvoyProxySecretRef(ctx, gwcResource) + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + true, + string(gwapiv1.GatewayClassReasonAccepted), + status.MsgValidGatewayClass) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + if len(gwcResource.Gateways) == 0 { r.log.Info("No gateways found for accepted gatewayClass") @@ -433,9 +385,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if isTransientError(err) { return reconcile.Result{}, err } - r.log.Error(err, fmt.Sprintf("failed to remove finalizer from gatewayClass %s", managedGC.Name)) + continue } } else { // finalize the accepted GatewayClass. @@ -443,17 +395,11 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if isTransientError(err) { return reconcile.Result{}, err } - - // Failed to add finalizer should not affect the xDS cache update. - // Log the error and continue processing. r.log.Error(err, fmt.Sprintf("failed adding finalizer to gatewayClass %s", managedGC.Name)) + continue } } - - // Set HasErrors to true if there are any non-transient errors during processing of the Gateway and Envoy Gateway resources. - // This will be used to determine whether the xDS cache should be updated or not. - gwcResource.HasErrors = (len(errs) > 0) } // Store the Gateway Resources for the GatewayClass. @@ -466,13 +412,10 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, nil } -func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) error { - if gwcResource.EnvoyProxyForGatewayClass == nil || - gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS == nil || - gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef == nil { - return nil +func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) { + if gwcResource.EnvoyProxyForGatewayClass == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef == nil { + return } - certRef := gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef if refsSecret(certRef) { if err := r.processSecretRef( @@ -483,15 +426,11 @@ func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, g gwcResource.EnvoyProxyForGatewayClass.Namespace, resource.KindEnvoyProxy, *certRef); err != nil { - return fmt.Errorf("failed to process EnvoyProxy SecretRef: %w", err) + r.log.Error(err, + "failed to process TLS SecretRef for EnvoyProxy", + "gateway", "issue", "secretRef", certRef) } - } else { - // only secret is supported for ClientCertificateRef - return fmt.Errorf("EnvoyProxy %s/%s has unsupported ClientCertificateRef %v", - gwcResource.EnvoyProxyForGatewayClass.Namespace, - gwcResource.EnvoyProxyForGatewayClass.Name, *certRef) } - return nil } // managedGatewayClasses returns a list of GatewayClass objects that are managed by the Envoy Gateway Controller. @@ -527,11 +466,7 @@ func (r *gatewayAPIReconciler) managedGatewayClasses(ctx context.Context) ([]*gw // - EndpointSlices // - Backends // - CACertificateRefs in the Backends -func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) error { - // Only transient errors are returned from this function to allow Reconcile to retry. - // All other errors are just logged and ignored - these errors result in missing referenced backend resources - // in the resource tree, which is acceptable as the Gateway API translation layer will handle them. - // The Gateway API translation layer will surface these errors in the status of the resources referencing them. +func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) { for backendRef := range resourceMappings.allAssociatedBackendRefs { backendRefKind := gatewayapi.KindDerefOr(backendRef.Kind, resource.KindService) r.log.Info("processing Backend", "kind", backendRefKind, "namespace", string(*backendRef.Namespace), @@ -543,9 +478,6 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour service := new(corev1.Service) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, service) if err != nil { - if isTransientError(err) { - return err - } r.log.Error(err, "failed to get Service", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -560,9 +492,6 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour serviceImport := new(mcsapiv1a1.ServiceImport) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, serviceImport) if err != nil { - if isTransientError(err) { - return err - } r.log.Error(err, "failed to get ServiceImport", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -581,9 +510,6 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour backend := new(egv1a1.Backend) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, backend) if err != nil { - if isTransientError(err) { - return err - } r.log.Error(err, "failed to get Backend", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -632,9 +558,6 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour caRefNew) } if err != nil { - if isTransientError(err) { - return err - } r.log.Error(err, "failed to process CACertificateRef for Backend", "backend", backend, "caCertificateRef", caCertRef.Name) @@ -654,9 +577,6 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour client.InNamespace(*backendRef.Namespace), } if err := r.client.List(ctx, endpointSliceList, opts...); err != nil { - if isTransientError(err) { - return err - } r.log.Error(err, "failed to get EndpointSlices", "namespace", string(*backendRef.Namespace), backendRefKind, string(backendRef.Name)) } else { @@ -673,7 +593,6 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour } } } - return nil } // processSecurityPolicyObjectRefs adds the referenced resources in SecurityPolicies @@ -1106,7 +1025,7 @@ func (r *gatewayAPIReconciler) processConfigMapRef( // to the resourceTree func (r *gatewayAPIReconciler) processBtpConfigMapRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) error { +) { for _, policy := range resourceTree.BackendTrafficPolicies { for _, ro := range policy.Spec.ResponseOverride { if ro.Response.Body != nil && ro.Response.Body.ValueRef != nil && string(ro.Response.Body.ValueRef.Kind) == resource.KindConfigMap { @@ -1122,10 +1041,6 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( // when translating to IR because the referenced configmap can't be // found. if err != nil { - // Only transient errors are returned from this function to allow Reconcile to retry. - if isTransientError(err) { - return err - } r.log.Error(err, "failed to process ResponseOverride ValueRef for BackendTrafficPolicy", "policy", policy, "ValueRef", ro.Response.Body.ValueRef.Name) @@ -1140,7 +1055,6 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( } } } - return nil } func (r *gatewayAPIReconciler) getNamespace(ctx context.Context, name string) (*corev1.Namespace, error) { @@ -1379,7 +1293,8 @@ func (r *gatewayAPIReconciler) processBackendTrafficPolicies(ctx context.Context resourceTree.BackendTrafficPolicies = append(resourceTree.BackendTrafficPolicies, &backendTrafficPolicy) } } - return r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) + r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) + return nil } // processSecurityPolicies adds SecurityPolicies and their referenced resources to the resourceTree diff --git a/internal/provider/kubernetes/controller_test.go b/internal/provider/kubernetes/controller_test.go index f374f04680..c0e2d61155 100644 --- a/internal/provider/kubernetes/controller_test.go +++ b/internal/provider/kubernetes/controller_test.go @@ -7,7 +7,6 @@ package kubernetes import ( "context" - "fmt" "os" "testing" @@ -1029,7 +1028,7 @@ func setupReferenceGrantReconciler(objs []client.Object) *gatewayAPIReconciler { func TestIsTransientError(t *testing.T) { serverTimeoutErr := kerrors.NewServerTimeout( schema.GroupResource{Group: "core", Resource: "pods"}, "list", 10) - timeoutErr := fmt.Errorf("wrapped request timeout: %w", kerrors.NewTimeoutError("request timeout", 1)) + timeoutErr := kerrors.NewTimeoutError("request timeout", 1) tooManyRequestsErr := kerrors.NewTooManyRequests("too many requests", 1) serviceUnavailableErr := kerrors.NewServiceUnavailable("service unavailable") badRequestErr := kerrors.NewBadRequest("bad request") @@ -1040,7 +1039,7 @@ func TestIsTransientError(t *testing.T) { expected bool }{ {"ServerTimeout", serverTimeoutErr, true}, - {"Wrapped Timeout", timeoutErr, true}, + {"Timeout", timeoutErr, true}, {"TooManyRequests", tooManyRequestsErr, true}, {"ServiceUnavailable", serviceUnavailableErr, true}, {"BadRequest", badRequestErr, false}, diff --git a/test/e2e/testdata/provider-error.yaml b/test/e2e/testdata/provider-error.yaml deleted file mode 100644 index 26ccbfb017..0000000000 --- a/test/e2e/testdata/provider-error.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: simple-http-route - namespace: gateway-conformance-infra -spec: - parentRefs: - - name: same-namespace - rules: - - backendRefs: - - name: infra-backend-v1 - port: 8080 diff --git a/test/e2e/tests/provider_error.go b/test/e2e/tests/provider_error.go deleted file mode 100644 index ce7665b9eb..0000000000 --- a/test/e2e/tests/provider_error.go +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright Envoy Gateway Authors -// SPDX-License-Identifier: Apache-2.0 -// The full text of the Apache license is available in the LICENSE file at -// the root of the repo. - -//go:build e2e - -package tests - -import ( - "context" - "testing" - "time" - - "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/client" - gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" - "sigs.k8s.io/gateway-api/conformance/utils/http" - "sigs.k8s.io/gateway-api/conformance/utils/kubernetes" - "sigs.k8s.io/gateway-api/conformance/utils/suite" - - egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" - "github.com/envoyproxy/gateway/internal/gatewayapi" -) - -func init() { - ConformanceTests = append(ConformanceTests, ProviderErrorTest) -} - -var ProviderErrorTest = suite.ConformanceTest{ - ShortName: "ProviderError", - Description: "Test error handling in the provider", - Manifests: []string{"testdata/provider-error.yaml"}, - Test: func(t *testing.T, suite *suite.ConformanceTestSuite) { - t.Run("Continue serving requests with last known good configuration when errors are present in the provider", func(t *testing.T) { - ns := "gateway-conformance-infra" - routeNN := types.NamespacedName{Name: "simple-http-route", Namespace: ns} - gwNN := types.NamespacedName{Name: "same-namespace", Namespace: ns} - gwAddr := kubernetes.GatewayAndHTTPRoutesMustBeAccepted(t, suite.Client, suite.TimeoutConfig, suite.ControllerName, kubernetes.NewGatewayRef(gwNN), routeNN) - - // send a request to the gateway - expectedResponse := http.ExpectedResponse{ - Request: http.Request{ - Path: "/", - }, - Response: http.Response{ - StatusCode: 200, - }, - Namespace: ns, - } - - http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, gwAddr, expectedResponse) - - // Update the Envoy Proxy client TLS settings to an invalid value - // This causes the provider to return an error when processing the resources - proxyNN := types.NamespacedName{Name: "proxy-config", Namespace: "envoy-gateway-system"} - config := &egv1a1.BackendTLSConfig{ - ClientCertificateRef: &gwapiv1.SecretObjectReference{ - Kind: gatewayapi.KindPtr("Secret"), - Name: "client-tls-certificate", - Namespace: gatewayapi.NamespacePtr("envoy-gateway-system"), - }, - } - // We set replicas to 0, if EG doesn't skip the IRs with errors, the envoy proxy pods will be scaled down to 0 - // and the gateway will stop serving traffic. - err := UpdateEnvoyProxy(suite.Client, proxyNN, config, 0) - if err != nil { - t.Error(err) - } - - // Wait for a short period to allow the provider to process the resources and encounter the error - time.Sleep(5 * time.Second) - // We expect the gateway to continue serving requests with the last known good configuration - http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, gwAddr, expectedResponse) - - // Set the Envoy Proxy client TLS settings back to the valid value - err = UpdateEnvoyProxy(suite.Client, proxyNN, &egv1a1.BackendTLSConfig{ - ClientCertificateRef: nil, - TLSSettings: egv1a1.TLSSettings{}, - }, 1) - if err != nil { - t.Error(err) - } - }) - }, -} - -func UpdateEnvoyProxy(client client.Client, proxyNN types.NamespacedName, config *egv1a1.BackendTLSConfig, replica int32) error { - proxyConfig := &egv1a1.EnvoyProxy{} - err := client.Get(context.Background(), proxyNN, proxyConfig) - if err != nil { - return err - } - - proxyConfig.Spec.BackendTLS = config - proxyConfig.Spec.Provider.Kubernetes.EnvoyDeployment.Replicas = ptr.To(replica) - err = client.Update(context.Background(), proxyConfig) - if err != nil { - return err - } - return nil -} From 4ce38024a9c9d34eea5b55d443debe3a0646102f Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Wed, 2 Jul 2025 06:16:38 +0000 Subject: [PATCH 04/12] add log Signed-off-by: Huabing (Robin) Zhao --- internal/provider/kubernetes/controller.go | 80 ++++++++++++++++++---- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index edad83f4f4..ee1a8497f1 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -231,6 +231,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if managedGC.Spec.ParametersRef != nil && managedGC.DeletionTimestamp == nil { if err := r.processGatewayClassParamsRef(ctx, managedGC, resourceMappings, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing GatewayClass parametersRef", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } @@ -248,6 +249,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all Gateways, their associated Routes, and referenced resources to the resourceTree if err = r.processGateways(ctx, managedGC, resourceMappings, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing gateways", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processGateways for gatewayClass %s, skipping it", managedGC.Name)) @@ -258,6 +260,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all EnvoyPatchPolicies to the resourceTree if err = r.processEnvoyPatchPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing EnvoyPatchPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processEnvoyPatchPolicies for gatewayClass %s, skipping it", managedGC.Name)) @@ -268,6 +271,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all ClientTrafficPolicies and their referenced resources to the resourceTree if err = r.processClientTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing ClientTrafficPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processClientTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) @@ -279,6 +283,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all BackendTrafficPolicies to the resourceTree if err = r.processBackendTrafficPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing BackendTrafficPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processBackendTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) @@ -290,6 +295,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all SecurityPolicies and their referenced resources to the resourceTree if err = r.processSecurityPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing SecurityPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processSecurityPolicies for gatewayClass %s, skipping it", managedGC.Name)) @@ -301,6 +307,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all BackendTLSPolies to the resourceTree if err = r.processBackendTLSPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing BackendTLSPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processBackendTLSPolicies for gatewayClass %s, skipping it", managedGC.Name)) @@ -312,6 +319,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add all EnvoyExtensionPolicies and their referenced resources to the resourceTree if err = r.processEnvoyExtensionPolicies(ctx, gwcResource, resourceMappings); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing EnvoyExtensionPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processEnvoyExtensionPolicies for gatewayClass %s, skipping it", managedGC.Name)) @@ -321,6 +329,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if err = r.processExtensionServerPolicies(ctx, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing ExtensionServerPolicies", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processExtensionServerPolicies for gatewayClass %s, skipping it", managedGC.Name)) @@ -330,6 +339,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if r.backendCRDExists { if err = r.processBackends(ctx, gwcResource); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error processing Backends", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processBackends for gatewayClass %s, skipping it", managedGC.Name)) @@ -340,7 +350,14 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Add the referenced services, ServiceImports, and EndpointSlices in // the collected BackendRefs to the resourceTree. // BackendRefs are referred by various Route objects and the ExtAuth in SecurityPolicies. - r.processBackendRefs(ctx, gwcResource, resourceMappings) + if err = r.processBackendRefs(ctx, gwcResource, resourceMappings); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing BackendRefs", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + + r.log.Error(err, fmt.Sprintf("failed processBackendRefs for gatewayClass %s, skipping it", managedGC.Name)) + } // For this particular Gateway, and all associated objects, check whether the // namespace exists. Add to the resourceTree. @@ -348,6 +365,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques namespace, err := r.getNamespace(ctx, ns) if err != nil { if isTransientError(err) { + r.log.Error(err, "transient error getting namespace", "namespace", ns, "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, "unable to find the namespace") @@ -369,13 +387,26 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } // process envoy gateway secret refs - r.processEnvoyProxySecretRef(ctx, gwcResource) - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - true, - string(gwapiv1.GatewayClassReasonAccepted), - status.MsgValidGatewayClass) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + if err := r.processEnvoyProxySecretRef(ctx, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing TLS SecretRef for EnvoyProxy", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + false, + string(gwapiv1.GatewayClassReasonAccepted), + fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + r.log.Error(err, fmt.Sprintf("failed process TLS SecretRef for EnvoyProxy for gatewayClass %s", managedGC.Name)) + } else { + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + true, + string(gwapiv1.GatewayClassReasonAccepted), + status.MsgValidGatewayClass) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + } if len(gwcResource.Gateways) == 0 { r.log.Info("No gateways found for accepted gatewayClass") @@ -383,6 +414,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // If needed, remove the finalizer from the accepted GatewayClass. if err := r.removeFinalizer(ctx, managedGC); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error removing finalizer from gatewayClass", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed to remove finalizer from gatewayClass %s", @@ -393,6 +425,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // finalize the accepted GatewayClass. if err := r.addFinalizer(ctx, managedGC); err != nil { if isTransientError(err) { + r.log.Error(err, "transient error adding finalizer to gatewayClass", "gatewayClass", managedGC.Name) return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed adding finalizer to gatewayClass %s", @@ -412,9 +445,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, nil } -func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) { +func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) error { if gwcResource.EnvoyProxyForGatewayClass == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef == nil { - return + return nil } certRef := gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef if refsSecret(certRef) { @@ -426,11 +459,10 @@ func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, g gwcResource.EnvoyProxyForGatewayClass.Namespace, resource.KindEnvoyProxy, *certRef); err != nil { - r.log.Error(err, - "failed to process TLS SecretRef for EnvoyProxy", - "gateway", "issue", "secretRef", certRef) + return err } } + return nil } // managedGatewayClasses returns a list of GatewayClass objects that are managed by the Envoy Gateway Controller. @@ -466,7 +498,11 @@ func (r *gatewayAPIReconciler) managedGatewayClasses(ctx context.Context) ([]*gw // - EndpointSlices // - Backends // - CACertificateRefs in the Backends -func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) { +func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) error { + // Only transient errors are returned from this function to allow Reconcile to retry. + // All other errors are just logged and ignored - these errors result in missing referenced backend resources + // in the resource tree, which is acceptable as the Gateway API translation layer will handle them. + // The Gateway API translation layer will surface these errors in the status of the resources referencing them. for backendRef := range resourceMappings.allAssociatedBackendRefs { backendRefKind := gatewayapi.KindDerefOr(backendRef.Kind, resource.KindService) r.log.Info("processing Backend", "kind", backendRefKind, "namespace", string(*backendRef.Namespace), @@ -478,6 +514,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour service := new(corev1.Service) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, service) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get Service", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -492,6 +531,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour serviceImport := new(mcsapiv1a1.ServiceImport) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, serviceImport) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get ServiceImport", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -510,6 +552,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour backend := new(egv1a1.Backend) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, backend) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get Backend", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -558,6 +603,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour caRefNew) } if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process CACertificateRef for Backend", "backend", backend, "caCertificateRef", caCertRef.Name) @@ -577,6 +625,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour client.InNamespace(*backendRef.Namespace), } if err := r.client.List(ctx, endpointSliceList, opts...); err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get EndpointSlices", "namespace", string(*backendRef.Namespace), backendRefKind, string(backendRef.Name)) } else { @@ -593,6 +644,7 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour } } } + return nil } // processSecurityPolicyObjectRefs adds the referenced resources in SecurityPolicies From bba7eeda1513fdfb042254944dbd89b91d7771ee Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Wed, 2 Jul 2025 06:40:45 +0000 Subject: [PATCH 05/12] don't skip resources when errors occur Signed-off-by: Huabing (Robin) Zhao --- internal/provider/kubernetes/controller.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index feaec4558e..9aded12ca9 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -263,7 +263,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processGateways for gatewayClass %s, skipping it", managedGC.Name)) - continue } if r.eppCRDExists { @@ -274,7 +273,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processEnvoyPatchPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } @@ -310,7 +308,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processSecurityPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } @@ -322,7 +319,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processBackendTLSPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } @@ -334,7 +330,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processEnvoyExtensionPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } @@ -344,7 +339,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processExtensionServerPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } if r.backendCRDExists { @@ -354,7 +348,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processBackends for gatewayClass %s, skipping it", managedGC.Name)) - continue } } @@ -430,7 +423,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } r.log.Error(err, fmt.Sprintf("failed to remove finalizer from gatewayClass %s", managedGC.Name)) - continue } } else { // finalize the accepted GatewayClass. @@ -441,7 +433,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } r.log.Error(err, fmt.Sprintf("failed adding finalizer to gatewayClass %s", managedGC.Name)) - continue } } } From efa613340d72a858a2407a3e277d00773a9ef6dd Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Wed, 2 Jul 2025 06:52:04 +0000 Subject: [PATCH 06/12] skip invalid GatewayClass Signed-off-by: Huabing (Robin) Zhao --- internal/provider/kubernetes/controller.go | 49 ++++++++++++---------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index 9aded12ca9..ebe3420e61 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -230,7 +230,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Process the parametersRef of the accepted GatewayClass. // This should run before processGateways and processBackendRefs if managedGC.Spec.ParametersRef != nil && managedGC.DeletionTimestamp == nil { - if err := r.processGatewayClassParamsRef(ctx, managedGC, gwcResourceMapping, gwcResource); err != nil { if isTransientError(err) { r.log.Error(err, "transient error processing GatewayClass parametersRef", "gatewayClass", managedGC.Name) @@ -248,6 +247,32 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques continue } } + + // process envoy gateway secret refs + if err := r.processEnvoyProxySecretRef(ctx, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing TLS SecretRef for EnvoyProxy", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + + r.log.Error(err, fmt.Sprintf("failed process TLS SecretRef for EnvoyProxy for gatewayClass %s, skipping it", managedGC.Name)) + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + false, + string(gwapiv1.GatewayClassReasonAccepted), + fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + continue + } + + // GatewayClass is valid so far, mark it as accepted. + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + true, + string(gwapiv1.GatewayClassReasonAccepted), + status.MsgValidGatewayClass) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + // it's safe here to append gwcResource to gwcResources gwcResources = append(gwcResources, gwcResource) // process global resources @@ -390,28 +415,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } } - // process envoy gateway secret refs - if err := r.processEnvoyProxySecretRef(ctx, gwcResource); err != nil { - if isTransientError(err) { - r.log.Error(err, "transient error processing TLS SecretRef for EnvoyProxy", "gatewayClass", managedGC.Name) - return reconcile.Result{}, err - } - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - false, - string(gwapiv1.GatewayClassReasonAccepted), - fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - r.log.Error(err, fmt.Sprintf("failed process TLS SecretRef for EnvoyProxy for gatewayClass %s", managedGC.Name)) - } else { - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - true, - string(gwapiv1.GatewayClassReasonAccepted), - status.MsgValidGatewayClass) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - } - if len(gwcResource.Gateways) == 0 { r.log.Info("No gateways found for accepted gatewayClass") From fc736a8009955d89c1850c31b1844b9e94c11a24 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Wed, 2 Jul 2025 07:14:43 +0000 Subject: [PATCH 07/12] address comment Signed-off-by: Huabing (Robin) Zhao --- internal/provider/kubernetes/controller.go | 1 + internal/provider/kubernetes/controller_test.go | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index ebe3420e61..7c525dd56f 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -186,6 +186,7 @@ func isTransientError(err error) bool { kerrors.IsTooManyRequests(err) || kerrors.IsServiceUnavailable(err) || kerrors.IsStoreReadError(err) || + kerrors.IsInternalError(err) || kerrors.IsUnexpectedServerError(err) } diff --git a/internal/provider/kubernetes/controller_test.go b/internal/provider/kubernetes/controller_test.go index e71dd4a4da..a8b33af98a 100644 --- a/internal/provider/kubernetes/controller_test.go +++ b/internal/provider/kubernetes/controller_test.go @@ -7,6 +7,7 @@ package kubernetes import ( "context" + "fmt" "os" "testing" @@ -1029,7 +1030,7 @@ func TestIsTransientError(t *testing.T) { serverTimeoutErr := kerrors.NewServerTimeout( schema.GroupResource{Group: "core", Resource: "pods"}, "list", 10) timeoutErr := kerrors.NewTimeoutError("request timeout", 1) - tooManyRequestsErr := kerrors.NewTooManyRequests("too many requests", 1) + wrappedTooManyRequestsErr := fmt.Errorf("wrapping: %w", kerrors.NewTooManyRequests("too many requests", 1)) serviceUnavailableErr := kerrors.NewServiceUnavailable("service unavailable") badRequestErr := kerrors.NewBadRequest("bad request") @@ -1040,7 +1041,7 @@ func TestIsTransientError(t *testing.T) { }{ {"ServerTimeout", serverTimeoutErr, true}, {"Timeout", timeoutErr, true}, - {"TooManyRequests", tooManyRequestsErr, true}, + {"TooManyRequests", wrappedTooManyRequestsErr, true}, {"ServiceUnavailable", serviceUnavailableErr, true}, {"BadRequest", badRequestErr, false}, {"NilError", nil, false}, From 2d1edd9e7aa48d824521f939be6b82d4764ae321 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Thu, 3 Jul 2025 01:05:15 +0000 Subject: [PATCH 08/12] handle all transient errors Signed-off-by: Huabing (Robin) Zhao --- examples/redis/redis.yaml | 11 +- .../translate/in/envoy-patch-policy.yaml | 2 + internal/provider/kubernetes/controller.go | 166 +++++++++++------- internal/utils/jsonpatch/patch.go | 1 + 4 files changed, 108 insertions(+), 72 deletions(-) diff --git a/examples/redis/redis.yaml b/examples/redis/redis.yaml index cdeb576100..2f5a8e941c 100644 --- a/examples/redis/redis.yaml +++ b/examples/redis/redis.yaml @@ -1,13 +1,8 @@ -kind: Namespace -apiVersion: v1 -metadata: - name: redis-system ---- + apiVersion: apps/v1 kind: Deployment metadata: name: redis - namespace: redis-system labels: app: redis spec: @@ -36,9 +31,9 @@ apiVersion: v1 kind: Service metadata: name: redis - namespace: redis-system labels: app: redis + istio.io/use-waypoint: reviews-waypoint annotations: spec: ports: @@ -66,4 +61,4 @@ data: backend: type: Redis redis: - url: redis.redis-system.svc.cluster.local:6379 + url: redis.default.svc.cluster.local:6379 diff --git a/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml b/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml index be98f705f7..84387e46f0 100644 --- a/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml +++ b/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml @@ -156,3 +156,5 @@ spec: socket_address: address: ratelimit.svc.cluster.local port_value: 8081 + + diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index 7c525dd56f..afa322ba5b 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -278,9 +278,21 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques gwcResources = append(gwcResources, gwcResource) // process global resources // add the OIDC HMAC Secret to the resourceTree - r.processOIDCHMACSecret(ctx, gwcResource, gwcResourceMapping) + if err = r.processOIDCHMACSecret(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing OIDC HMAC Secret", "gatewayClass", managedGC.Name) + } + r.log.Error(err, fmt.Sprintf("failed processOIDCHMACSecret for gatewayClass %s, skipping it", managedGC.Name)) + } + // add the Envoy TLS Secret to the resourceTree - r.processEnvoyTLSSecret(ctx, gwcResource, gwcResourceMapping) + if err = r.processEnvoyTLSSecret(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing Envoy TLS Secret", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + r.log.Error(err, fmt.Sprintf("failed processEnvoyTLSSecret for gatewayClass %s, skipping it", managedGC.Name)) + } // Add all Gateways, their associated Routes, and referenced resources to the resourceTree if err = r.processGateways(ctx, managedGC, gwcResourceMapping, gwcResource); err != nil { @@ -310,7 +322,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processClientTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } @@ -322,7 +333,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processBackendTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } @@ -609,6 +619,7 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour caRefNew) } if err != nil { + // If the error is transient, we return it to allow Reconcile to retry. if isTransientError(err) { return err } @@ -659,7 +670,7 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour // - BackendRefs for ExAuth func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { // we don't return errors from this method, because we want to continue reconciling // the rest of the SecurityPolicies despite that one reference is invalid. This // allows Envoy Gateway to continue serving traffic even if some SecurityPolicies @@ -680,6 +691,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, oidc.ClientSecret); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process OIDC SecretRef for SecurityPolicy", "policy", policy, "secretRef", oidc.ClientSecret) @@ -698,6 +713,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, credRef); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process APIKeyAuth SecretRef for SecurityPolicy", "policy", policy, "secretRef", apiKeyAuth.CredentialRefs) @@ -716,6 +735,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, basicAuth.Users); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process BasicAuth SecretRef for SecurityPolicy", "policy", policy, "secretRef", basicAuth.Users) @@ -776,6 +799,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( Kind: &provider.LocalJWKS.ValueRef.Kind, Name: provider.LocalJWKS.ValueRef.Name, }); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process LocalJWKS ConfigMap", "policy", policy, "localJWKS", provider.LocalJWKS) } } else if provider.RemoteJWKS != nil { @@ -788,6 +815,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, br.BackendObjectReference); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process RemoteJWKS BackendRef for SecurityPolicy", "policy", policy, "backendRef", br.BackendObjectReference) @@ -797,6 +828,7 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( } } } + return nil } // processBackendRef adds the referenced BackendRef to the resourceMap for later processBackendRefs. @@ -853,25 +885,16 @@ func (r *gatewayAPIReconciler) processBackendRef( // processOIDCHMACSecret adds the OIDC HMAC Secret to the resourceTree. // The OIDC HMAC Secret is created by the CertGen job and is used by SecurityPolicy // to configure OAuth2 filters. -func (r *gatewayAPIReconciler) processOIDCHMACSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) { +func (r *gatewayAPIReconciler) processOIDCHMACSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) error { var ( secret corev1.Secret err error ) - err = r.client.Get(ctx, + if err = r.client.Get(ctx, types.NamespacedName{Namespace: r.namespace, Name: oidcHMACSecretName}, - &secret, - ) - // We don't return an error here, because we want to continue reconciling - // despite that the OIDC HMAC secret can't be found. - // If the OIDC HMAC Secret is missing, the SecurityPolicy with OIDC will be - // marked as invalid in its status when translating to IR. - if err != nil { - r.log.Error(err, - "failed to process OIDC HMAC Secret", - "namespace", r.namespace, "name", oidcHMACSecretName) - return + &secret); err != nil { + return err } key := utils.NamespacedName(&secret).String() @@ -880,26 +903,20 @@ func (r *gatewayAPIReconciler) processOIDCHMACSecret(ctx context.Context, resour resourceTree.Secrets = append(resourceTree.Secrets, &secret) r.log.Info("processing OIDC HMAC Secret", "namespace", r.namespace, "name", oidcHMACSecretName) } + return nil } // processEnvoyTLSSecret adds the Envoy TLS Secret to the resourceTree. // The Envoy TLS Secret is created by the CertGen job and is used by envoy to establish // TLS connections to the rate limit service. -func (r *gatewayAPIReconciler) processEnvoyTLSSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) { - var ( - secret corev1.Secret - err error - ) +func (r *gatewayAPIReconciler) processEnvoyTLSSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) error { + var secret corev1.Secret - err = r.client.Get(ctx, + if err := r.client.Get(ctx, types.NamespacedName{Namespace: r.namespace, Name: envoyTLSSecretName}, &secret, - ) - if err != nil { - r.log.Error(err, - "failed to process Envoy TLS Secret", - "namespace", r.namespace, "name", envoyTLSSecretName) - return + ); err != nil { + return err } key := utils.NamespacedName(&secret).String() @@ -908,6 +925,7 @@ func (r *gatewayAPIReconciler) processEnvoyTLSSecret(ctx context.Context, resour resourceTree.Secrets = append(resourceTree.Secrets, &secret) r.log.Info("processing Envoy TLS Secret", "namespace", r.namespace, "name", envoyTLSSecretName) } + return nil } // processSecretRef adds the referenced Secret to the resourceTree if it's valid. @@ -924,12 +942,10 @@ func (r *gatewayAPIReconciler) processSecretRef( ) error { secret := new(corev1.Secret) secretNS := gatewayapi.NamespaceDerefOr(secretRef.Namespace, ownerNS) - err := r.client.Get(ctx, + if err := r.client.Get(ctx, types.NamespacedName{Namespace: secretNS, Name: string(secretRef.Name)}, - secret, - ) - if err != nil && kerrors.IsNotFound(err) { - return fmt.Errorf("unable to find the Secret: %s/%s", secretNS, string(secretRef.Name)) + secret); err != nil { + return err } if secretNS != ownerNS { @@ -975,7 +991,7 @@ func (r *gatewayAPIReconciler) processSecretRef( // to the resourceTree func (r *gatewayAPIReconciler) processCtpConfigMapRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { for _, policy := range resourceTree.ClientTrafficPolicies { tls := policy.Spec.TLS @@ -990,6 +1006,10 @@ func (r *gatewayAPIReconciler) processCtpConfigMapRefs( policy.Namespace, policy.Name, caCertRef); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } // we don't return an error here, because we want to continue // reconciling the rest of the ClientTrafficPolicies despite that this // reference is invalid. @@ -1009,6 +1029,9 @@ func (r *gatewayAPIReconciler) processCtpConfigMapRefs( policy.Namespace, policy.Name, caCertRef); err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process CACertificateRef for ClientTrafficPolicy", "policy", policy, "caCertificateRef", caCertRef.Name) @@ -1017,6 +1040,7 @@ func (r *gatewayAPIReconciler) processCtpConfigMapRefs( } } } + return nil } // processConfigMapRef adds the referenced ConfigMap to the resourceTree if it's valid. @@ -1033,12 +1057,10 @@ func (r *gatewayAPIReconciler) processConfigMapRef( ) error { configMap := new(corev1.ConfigMap) configMapNS := gatewayapi.NamespaceDerefOr(configMapRef.Namespace, ownerNS) - err := r.client.Get(ctx, + if err := r.client.Get(ctx, types.NamespacedName{Namespace: configMapNS, Name: string(configMapRef.Name)}, - configMap, - ) - if err != nil && kerrors.IsNotFound(err) { - return fmt.Errorf("unable to find the ConfigMap: %s/%s", configMapNS, string(configMapRef.Name)) + configMap); err != nil { + return err } if configMapNS != ownerNS { @@ -1083,7 +1105,7 @@ func (r *gatewayAPIReconciler) processConfigMapRef( // to the resourceTree func (r *gatewayAPIReconciler) processBtpConfigMapRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { for _, policy := range resourceTree.BackendTrafficPolicies { for _, ro := range policy.Spec.ResponseOverride { if ro.Response.Body != nil && ro.Response.Body.ValueRef != nil && string(ro.Response.Body.ValueRef.Kind) == resource.KindConfigMap { @@ -1099,6 +1121,10 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( // when translating to IR because the referenced configmap can't be // found. if err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process ResponseOverride ValueRef for BackendTrafficPolicy", "policy", policy, "ValueRef", ro.Response.Body.ValueRef.Name) @@ -1113,13 +1139,13 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( } } } + return nil } func (r *gatewayAPIReconciler) getNamespace(ctx context.Context, name string) (*corev1.Namespace, error) { nsKey := types.NamespacedName{Name: name} ns := new(corev1.Namespace) if err := r.client.Get(ctx, nsKey, ns); err != nil { - r.log.Error(err, "unable to get Namespace") return nil, err } return ns, nil @@ -1198,6 +1224,10 @@ func (r *gatewayAPIReconciler) processGateways(ctx context.Context, managedGC *g gtw := gtw //nolint:copyloopvar if r.namespaceLabel != nil { if ok, err := r.checkObjectNamespaceLabels(>w); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to check namespace labels for gateway %s in namespace %s: %w", gtw.GetName(), gtw.GetNamespace()) continue } else if !ok { @@ -1221,6 +1251,9 @@ func (r *gatewayAPIReconciler) processGateways(ctx context.Context, managedGC *g gtw.Namespace, gtw.Name, certRef); err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process TLS SecretRef for gateway", "gateway", gtw, "secretRef", certRef) @@ -1271,6 +1304,10 @@ func (r *gatewayAPIReconciler) processGateways(ctx context.Context, managedGC *g gtw.Status = gwapiv1.GatewayStatus{} if err := r.processGatewayParamsRef(ctx, >w, resourceMap, resourceTree); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } // Update the Gateway status to not accepted if there is an error processing the parametersRef. // These not-accepted gateways will not be processed by the gateway-api layer, but their status will be // updated in the gateway-api layer along with other gateways. This is to avoid the potential race condition @@ -1328,9 +1365,7 @@ func (r *gatewayAPIReconciler) processClientTrafficPolicies( } } - r.processCtpConfigMapRefs(ctx, resourceTree, resourceMap) - - return nil + return r.processCtpConfigMapRefs(ctx, resourceTree, resourceMap) } // processBackendTrafficPolicies adds BackendTrafficPolicies to the resourceTree @@ -1351,8 +1386,7 @@ func (r *gatewayAPIReconciler) processBackendTrafficPolicies(ctx context.Context resourceTree.BackendTrafficPolicies = append(resourceTree.BackendTrafficPolicies, &backendTrafficPolicy) } } - r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) - return nil + return r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) } // processSecurityPolicies adds SecurityPolicies and their referenced resources to the resourceTree @@ -1376,9 +1410,7 @@ func (r *gatewayAPIReconciler) processSecurityPolicies( } // Add the referenced Resources in SecurityPolicies to the resourceTree - r.processSecurityPolicyObjectRefs(ctx, resourceTree, resourceMap) - - return nil + return r.processSecurityPolicyObjectRefs(ctx, resourceTree, resourceMap) } // processBackendTLSPolicies adds BackendTLSPolicies and their referenced resources to the resourceTree @@ -1402,8 +1434,7 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicies( } // Add the referenced Secrets and ConfigMaps in BackendTLSPolicies to the resourceTree. - r.processBackendTLSPolicyRefs(ctx, resourceTree, resourceMap) - return nil + return r.processBackendTLSPolicyRefs(ctx, resourceTree, resourceMap) } // processBackends adds Backends to the resourceTree @@ -2120,9 +2151,7 @@ func (r *gatewayAPIReconciler) processGatewayParamsRef(ctx context.Context, gtw gtw.Namespace, gtw.Name, *certRef); err != nil { - r.log.Error(err, - "failed to process TLS SecretRef for gateway", - "gateway", utils.NamespacedName(gtw).String(), "secretRef", certRef) + return fmt.Errorf("failed to process TLS SecretRef for gateway %s/%s: %w", gtw.Namespace, gtw.Name, err) } } } @@ -2139,9 +2168,6 @@ func (r *gatewayAPIReconciler) processGatewayClassParamsRef(ctx context.Context, ep := new(egv1a1.EnvoyProxy) if err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*gc.Spec.ParametersRef.Namespace), Name: gc.Spec.ParametersRef.Name}, ep); err != nil { - if kerrors.IsNotFound(err) { - return fmt.Errorf("envoyproxy referenced by gatewayclass is not found: %w", err) - } return fmt.Errorf("failed to find envoyproxy %s/%s: %w", r.namespace, gc.Spec.ParametersRef.Name, err) } @@ -2245,7 +2271,7 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicyRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { for _, policy := range resourceTree.BackendTLSPolicies { tls := policy.Spec.Validation @@ -2284,6 +2310,10 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicyRefs( caRefNew) } if err != nil { + // if the error is transient, we return it to retry later + if isTransientError(err) { + return err + } // we don't return an error here, because we want to continue // reconciling the rest of the ClientTrafficPolicies despite that this // reference is invalid. @@ -2298,6 +2328,7 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicyRefs( } } } + return nil } // processEnvoyExtensionPolicies adds EnvoyExtensionPolicies and their referenced resources to the resourceTree @@ -2322,9 +2353,7 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicies( } // Add the referenced Resources in EnvoyExtensionPolicies to the resourceTree - r.processEnvoyExtensionPolicyObjectRefs(ctx, resourceTree, resourceMap) - - return nil + return r.processEnvoyExtensionPolicyObjectRefs(ctx, resourceTree, resourceMap) } // processExtensionServerPolicies adds directly attached policies intended for the extension server @@ -2367,7 +2396,7 @@ func (r *gatewayAPIReconciler) processExtensionServerPolicies( // - ValueRefs for Luas func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { // we don't return errors from this method, because we want to continue reconciling // the rest of the EnvoyExtensionPolicies despite that one reference is invalid. This // allows Envoy Gateway to continue serving traffic even if some EnvoyExtensionPolicies @@ -2405,6 +2434,10 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( policy.Namespace, policy.Name, *wasm.Code.Image.PullSecretRef); err != nil { + // If the error is transient, we return it to retry later + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process Wasm Image PullSecretRef for EnvoyExtensionPolicy", "policy", policy, "secretRef", wasm.Code.Image.PullSecretRef) @@ -2422,6 +2455,10 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( configMap, ) if err != nil { + // If the error is transient, we return it to retry later + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process Lua ValueRef for EnvoyExtensionPolicy", "policy", policy, "ValueRef", lua.ValueRef.Name) @@ -2437,4 +2474,5 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( } } } + return nil } diff --git a/internal/utils/jsonpatch/patch.go b/internal/utils/jsonpatch/patch.go index 8c14ae19f4..79070c8cf5 100644 --- a/internal/utils/jsonpatch/patch.go +++ b/internal/utils/jsonpatch/patch.go @@ -87,6 +87,7 @@ func ApplyJSONPatches(document json.RawMessage, patches ...ir.JSONPatchOperation } // Apply patch + fmt.Println("xxxx applying patch:", string(jsonBytes), "to document:", string(document)) document, err = patchObj.ApplyWithOptions(document, opts) if err != nil { tErr := fmt.Errorf("unable to apply patch:\n%s on resource:\n%s, err: %s", string(jsonBytes), string(document), err.Error()) From 8704fd862e6800401365d0ca1ced9bafc32902a9 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Thu, 3 Jul 2025 01:06:50 +0000 Subject: [PATCH 09/12] remove debug info Signed-off-by: Huabing (Robin) Zhao --- examples/redis/redis.yaml | 11 ++++++++--- .../testdata/translate/in/envoy-patch-policy.yaml | 2 -- internal/utils/jsonpatch/patch.go | 1 - 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/redis/redis.yaml b/examples/redis/redis.yaml index 2f5a8e941c..cdeb576100 100644 --- a/examples/redis/redis.yaml +++ b/examples/redis/redis.yaml @@ -1,8 +1,13 @@ - +kind: Namespace +apiVersion: v1 +metadata: + name: redis-system +--- apiVersion: apps/v1 kind: Deployment metadata: name: redis + namespace: redis-system labels: app: redis spec: @@ -31,9 +36,9 @@ apiVersion: v1 kind: Service metadata: name: redis + namespace: redis-system labels: app: redis - istio.io/use-waypoint: reviews-waypoint annotations: spec: ports: @@ -61,4 +66,4 @@ data: backend: type: Redis redis: - url: redis.default.svc.cluster.local:6379 + url: redis.redis-system.svc.cluster.local:6379 diff --git a/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml b/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml index 84387e46f0..be98f705f7 100644 --- a/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml +++ b/internal/cmd/egctl/testdata/translate/in/envoy-patch-policy.yaml @@ -156,5 +156,3 @@ spec: socket_address: address: ratelimit.svc.cluster.local port_value: 8081 - - diff --git a/internal/utils/jsonpatch/patch.go b/internal/utils/jsonpatch/patch.go index 79070c8cf5..8c14ae19f4 100644 --- a/internal/utils/jsonpatch/patch.go +++ b/internal/utils/jsonpatch/patch.go @@ -87,7 +87,6 @@ func ApplyJSONPatches(document json.RawMessage, patches ...ir.JSONPatchOperation } // Apply patch - fmt.Println("xxxx applying patch:", string(jsonBytes), "to document:", string(document)) document, err = patchObj.ApplyWithOptions(document, opts) if err != nil { tErr := fmt.Errorf("unable to apply patch:\n%s on resource:\n%s, err: %s", string(jsonBytes), string(document), err.Error()) From 24d37d6133adfab17011f29c1473f7289c4e0a28 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Thu, 3 Jul 2025 01:23:25 +0000 Subject: [PATCH 10/12] address comment Signed-off-by: Huabing (Robin) Zhao --- internal/provider/kubernetes/controller.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index afa322ba5b..815481dde1 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -281,6 +281,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques if err = r.processOIDCHMACSecret(ctx, gwcResource, gwcResourceMapping); err != nil { if isTransientError(err) { r.log.Error(err, "transient error processing OIDC HMAC Secret", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err } r.log.Error(err, fmt.Sprintf("failed processOIDCHMACSecret for gatewayClass %s, skipping it", managedGC.Name)) } From c40c4cc31ef0d904d876f9027626ab3d3332d916 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Thu, 3 Jul 2025 04:43:48 +0000 Subject: [PATCH 11/12] don't skip failed GCs Signed-off-by: Huabing (Robin) Zhao --- internal/provider/kubernetes/controller.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index 815481dde1..822fe14a70 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -245,7 +245,6 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques string(gwapiv1.GatewayClassReasonInvalidParameters), msg) r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - continue } } @@ -263,16 +262,17 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques string(gwapiv1.GatewayClassReasonAccepted), fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - continue } - // GatewayClass is valid so far, mark it as accepted. - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - true, - string(gwapiv1.GatewayClassReasonAccepted), - status.MsgValidGatewayClass) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + if _, ok := r.resources.GatewayClassStatuses.Load(utils.NamespacedName(managedGC)); !ok { + // GatewayClass is valid so far, mark it as accepted. + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + true, + string(gwapiv1.GatewayClassReasonAccepted), + status.MsgValidGatewayClass) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + } // it's safe here to append gwcResource to gwcResources gwcResources = append(gwcResources, gwcResource) From 80b953efd1232c464b1ba059a3046aa7264a35c6 Mon Sep 17 00:00:00 2001 From: "Huabing (Robin) Zhao" Date: Thu, 3 Jul 2025 04:49:45 +0000 Subject: [PATCH 12/12] minor change Signed-off-by: Huabing (Robin) Zhao --- internal/provider/kubernetes/controller.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index 822fe14a70..afaba76c07 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -230,6 +230,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Process the parametersRef of the accepted GatewayClass. // This should run before processGateways and processBackendRefs + failToProcessGCParamsRef := false if managedGC.Spec.ParametersRef != nil && managedGC.DeletionTimestamp == nil { if err := r.processGatewayClassParamsRef(ctx, managedGC, gwcResourceMapping, gwcResource); err != nil { if isTransientError(err) { @@ -245,6 +246,7 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques string(gwapiv1.GatewayClassReasonInvalidParameters), msg) r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + failToProcessGCParamsRef = true } } @@ -262,9 +264,10 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques string(gwapiv1.GatewayClassReasonAccepted), fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + failToProcessGCParamsRef = true } - if _, ok := r.resources.GatewayClassStatuses.Load(utils.NamespacedName(managedGC)); !ok { + if !failToProcessGCParamsRef { // GatewayClass is valid so far, mark it as accepted. gc := status.SetGatewayClassAccepted( managedGC.DeepCopy(),