diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index c7fb7845e9..afaba76c07 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -180,6 +180,16 @@ func byNamespaceSelectorEnabled(eg *egv1a1.EnvoyGateway) bool { } } +func isTransientError(err error) bool { + return kerrors.IsServerTimeout(err) || + kerrors.IsTimeout(err) || + kerrors.IsTooManyRequests(err) || + kerrors.IsServiceUnavailable(err) || + kerrors.IsStoreReadError(err) || + kerrors.IsInternalError(err) || + kerrors.IsUnexpectedServerError(err) +} + // Reconcile handles reconciling all resources in a single call. Any resource event should enqueue the // same reconcile.Request containing the gateway controller name. This allows multiple resource updates to // be handled by a single call to Reconcile. The reconcile.Request DOES NOT map to a specific resource. @@ -220,8 +230,14 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques // Process the parametersRef of the accepted GatewayClass. // This should run before processGateways and processBackendRefs + failToProcessGCParamsRef := false if managedGC.Spec.ParametersRef != nil && managedGC.DeletionTimestamp == nil { if err := r.processGatewayClassParamsRef(ctx, managedGC, gwcResourceMapping, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing GatewayClass parametersRef", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + r.log.Error(err, fmt.Sprintf("failed processGatewayClassParamsRef for gatewayClass %s, skipping it", managedGC.Name)) msg := fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err) gc := status.SetGatewayClassAccepted( @@ -230,93 +246,172 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques string(gwapiv1.GatewayClassReasonInvalidParameters), msg) r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - continue + failToProcessGCParamsRef = true } } + + // process envoy gateway secret refs + if err := r.processEnvoyProxySecretRef(ctx, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing TLS SecretRef for EnvoyProxy", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + + r.log.Error(err, fmt.Sprintf("failed process TLS SecretRef for EnvoyProxy for gatewayClass %s, skipping it", managedGC.Name)) + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + false, + string(gwapiv1.GatewayClassReasonAccepted), + fmt.Sprintf("%s: %v", status.MsgGatewayClassInvalidParams, err)) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + failToProcessGCParamsRef = true + } + + if !failToProcessGCParamsRef { + // GatewayClass is valid so far, mark it as accepted. + gc := status.SetGatewayClassAccepted( + managedGC.DeepCopy(), + true, + string(gwapiv1.GatewayClassReasonAccepted), + status.MsgValidGatewayClass) + r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) + } + // it's safe here to append gwcResource to gwcResources gwcResources = append(gwcResources, gwcResource) // process global resources // add the OIDC HMAC Secret to the resourceTree - r.processOIDCHMACSecret(ctx, gwcResource, gwcResourceMapping) + if err = r.processOIDCHMACSecret(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing OIDC HMAC Secret", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + r.log.Error(err, fmt.Sprintf("failed processOIDCHMACSecret for gatewayClass %s, skipping it", managedGC.Name)) + } + // add the Envoy TLS Secret to the resourceTree - r.processEnvoyTLSSecret(ctx, gwcResource, gwcResourceMapping) + if err = r.processEnvoyTLSSecret(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing Envoy TLS Secret", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + r.log.Error(err, fmt.Sprintf("failed processEnvoyTLSSecret for gatewayClass %s, skipping it", managedGC.Name)) + } // Add all Gateways, their associated Routes, and referenced resources to the resourceTree if err = r.processGateways(ctx, managedGC, gwcResourceMapping, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing gateways", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processGateways for gatewayClass %s, skipping it", managedGC.Name)) - continue } if r.eppCRDExists { // Add all EnvoyPatchPolicies to the resourceTree if err = r.processEnvoyPatchPolicies(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing EnvoyPatchPolicies", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processEnvoyPatchPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } if r.ctpCRDExists { // Add all ClientTrafficPolicies and their referenced resources to the resourceTree if err = r.processClientTrafficPolicies(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing ClientTrafficPolicies", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processClientTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } if r.btpCRDExists { // Add all BackendTrafficPolicies to the resourceTree if err = r.processBackendTrafficPolicies(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing BackendTrafficPolicies", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processBackendTrafficPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } if r.spCRDExists { // Add all SecurityPolicies and their referenced resources to the resourceTree if err = r.processSecurityPolicies(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing SecurityPolicies", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processSecurityPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } if r.bTLSPolicyCRDExists { // Add all BackendTLSPolies to the resourceTree if err = r.processBackendTLSPolicies(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing BackendTLSPolicies", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processBackendTLSPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } if r.eepCRDExists { // Add all EnvoyExtensionPolicies and their referenced resources to the resourceTree if err = r.processEnvoyExtensionPolicies(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing EnvoyExtensionPolicies", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processEnvoyExtensionPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } } if err = r.processExtensionServerPolicies(ctx, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing ExtensionServerPolicies", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processExtensionServerPolicies for gatewayClass %s, skipping it", managedGC.Name)) - continue } if r.backendCRDExists { if err = r.processBackends(ctx, gwcResource); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing Backends", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed processBackends for gatewayClass %s, skipping it", managedGC.Name)) - continue } } // Add the referenced services, ServiceImports, and EndpointSlices in // the collected BackendRefs to the resourceTree. // BackendRefs are referred by various Route objects and the ExtAuth in SecurityPolicies. - r.processBackendRefs(ctx, gwcResource, gwcResourceMapping) + if err = r.processBackendRefs(ctx, gwcResource, gwcResourceMapping); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error processing BackendRefs", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } + + r.log.Error(err, fmt.Sprintf("failed processBackendRefs for gatewayClass %s, skipping it", managedGC.Name)) + } // For this particular Gateway, and all associated objects, check whether the // namespace exists. Add to the resourceTree. for ns := range gwcResourceMapping.allAssociatedNamespaces { namespace, err := r.getNamespace(ctx, ns) if err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error getting namespace", "namespace", ns, "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, "unable to find the namespace") if kerrors.IsNotFound(err) { continue @@ -335,30 +430,27 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques } } - // process envoy gateway secret refs - r.processEnvoyProxySecretRef(ctx, gwcResource) - gc := status.SetGatewayClassAccepted( - managedGC.DeepCopy(), - true, - string(gwapiv1.GatewayClassReasonAccepted), - status.MsgValidGatewayClass) - r.resources.GatewayClassStatuses.Store(utils.NamespacedName(gc), &gc.Status) - if len(gwcResource.Gateways) == 0 { r.log.Info("No gateways found for accepted gatewayClass") // If needed, remove the finalizer from the accepted GatewayClass. if err := r.removeFinalizer(ctx, managedGC); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error removing finalizer from gatewayClass", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed to remove finalizer from gatewayClass %s", managedGC.Name)) - continue } } else { // finalize the accepted GatewayClass. if err := r.addFinalizer(ctx, managedGC); err != nil { + if isTransientError(err) { + r.log.Error(err, "transient error adding finalizer to gatewayClass", "gatewayClass", managedGC.Name) + return reconcile.Result{}, err + } r.log.Error(err, fmt.Sprintf("failed adding finalizer to gatewayClass %s", managedGC.Name)) - continue } } } @@ -373,9 +465,9 @@ func (r *gatewayAPIReconciler) Reconcile(ctx context.Context, _ reconcile.Reques return reconcile.Result{}, nil } -func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) { +func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, gwcResource *resource.Resources) error { if gwcResource.EnvoyProxyForGatewayClass == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS == nil || gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef == nil { - return + return nil } certRef := gwcResource.EnvoyProxyForGatewayClass.Spec.BackendTLS.ClientCertificateRef if refsSecret(certRef) { @@ -387,11 +479,10 @@ func (r *gatewayAPIReconciler) processEnvoyProxySecretRef(ctx context.Context, g gwcResource.EnvoyProxyForGatewayClass.Namespace, resource.KindEnvoyProxy, *certRef); err != nil { - r.log.Error(err, - "failed to process TLS SecretRef for EnvoyProxy", - "gateway", "issue", "secretRef", certRef) + return err } } + return nil } // managedGatewayClasses returns a list of GatewayClass objects that are managed by the Envoy Gateway Controller. @@ -427,7 +518,11 @@ func (r *gatewayAPIReconciler) managedGatewayClasses(ctx context.Context) ([]*gw // - EndpointSlices // - Backends // - CACertificateRefs in the Backends -func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) { +func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResource *resource.Resources, resourceMappings *resourceMappings) error { + // Only transient errors are returned from this function to allow Reconcile to retry. + // All other errors are just logged and ignored - these errors result in missing referenced backend resources + // in the resource tree, which is acceptable as the Gateway API translation layer will handle them. + // The Gateway API translation layer will surface these errors in the status of the resources referencing them. for backendRef := range resourceMappings.allAssociatedBackendRefs { backendRefKind := gatewayapi.KindDerefOr(backendRef.Kind, resource.KindService) r.log.Info("processing Backend", "kind", backendRefKind, "namespace", string(*backendRef.Namespace), @@ -439,6 +534,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour service := new(corev1.Service) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, service) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get Service", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -453,6 +551,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour serviceImport := new(mcsapiv1a1.ServiceImport) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, serviceImport) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get ServiceImport", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -471,6 +572,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour backend := new(egv1a1.Backend) err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*backendRef.Namespace), Name: string(backendRef.Name)}, backend) if err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get Backend", "namespace", string(*backendRef.Namespace), "name", string(backendRef.Name)) } else { @@ -519,6 +623,10 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour caRefNew) } if err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process CACertificateRef for Backend", "backend", backend, "caCertificateRef", caCertRef.Name) @@ -538,6 +646,9 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour client.InNamespace(*backendRef.Namespace), } if err := r.client.List(ctx, endpointSliceList, opts...); err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to get EndpointSlices", "namespace", string(*backendRef.Namespace), backendRefKind, string(backendRef.Name)) } else { @@ -554,6 +665,7 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour } } } + return nil } // processSecurityPolicyObjectRefs adds the referenced resources in SecurityPolicies @@ -562,7 +674,7 @@ func (r *gatewayAPIReconciler) processBackendRefs(ctx context.Context, gwcResour // - BackendRefs for ExAuth func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { // we don't return errors from this method, because we want to continue reconciling // the rest of the SecurityPolicies despite that one reference is invalid. This // allows Envoy Gateway to continue serving traffic even if some SecurityPolicies @@ -583,6 +695,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, oidc.ClientSecret); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process OIDC SecretRef for SecurityPolicy", "policy", policy, "secretRef", oidc.ClientSecret) @@ -601,6 +717,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, credRef); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process APIKeyAuth SecretRef for SecurityPolicy", "policy", policy, "secretRef", apiKeyAuth.CredentialRefs) @@ -619,6 +739,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, basicAuth.Users); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process BasicAuth SecretRef for SecurityPolicy", "policy", policy, "secretRef", basicAuth.Users) @@ -679,6 +803,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( Kind: &provider.LocalJWKS.ValueRef.Kind, Name: provider.LocalJWKS.ValueRef.Name, }); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process LocalJWKS ConfigMap", "policy", policy, "localJWKS", provider.LocalJWKS) } } else if provider.RemoteJWKS != nil { @@ -691,6 +819,10 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( policy.Namespace, policy.Name, br.BackendObjectReference); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process RemoteJWKS BackendRef for SecurityPolicy", "policy", policy, "backendRef", br.BackendObjectReference) @@ -700,6 +832,7 @@ func (r *gatewayAPIReconciler) processSecurityPolicyObjectRefs( } } } + return nil } // processBackendRef adds the referenced BackendRef to the resourceMap for later processBackendRefs. @@ -756,25 +889,16 @@ func (r *gatewayAPIReconciler) processBackendRef( // processOIDCHMACSecret adds the OIDC HMAC Secret to the resourceTree. // The OIDC HMAC Secret is created by the CertGen job and is used by SecurityPolicy // to configure OAuth2 filters. -func (r *gatewayAPIReconciler) processOIDCHMACSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) { +func (r *gatewayAPIReconciler) processOIDCHMACSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) error { var ( secret corev1.Secret err error ) - err = r.client.Get(ctx, + if err = r.client.Get(ctx, types.NamespacedName{Namespace: r.namespace, Name: oidcHMACSecretName}, - &secret, - ) - // We don't return an error here, because we want to continue reconciling - // despite that the OIDC HMAC secret can't be found. - // If the OIDC HMAC Secret is missing, the SecurityPolicy with OIDC will be - // marked as invalid in its status when translating to IR. - if err != nil { - r.log.Error(err, - "failed to process OIDC HMAC Secret", - "namespace", r.namespace, "name", oidcHMACSecretName) - return + &secret); err != nil { + return err } key := utils.NamespacedName(&secret).String() @@ -783,26 +907,20 @@ func (r *gatewayAPIReconciler) processOIDCHMACSecret(ctx context.Context, resour resourceTree.Secrets = append(resourceTree.Secrets, &secret) r.log.Info("processing OIDC HMAC Secret", "namespace", r.namespace, "name", oidcHMACSecretName) } + return nil } // processEnvoyTLSSecret adds the Envoy TLS Secret to the resourceTree. // The Envoy TLS Secret is created by the CertGen job and is used by envoy to establish // TLS connections to the rate limit service. -func (r *gatewayAPIReconciler) processEnvoyTLSSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) { - var ( - secret corev1.Secret - err error - ) +func (r *gatewayAPIReconciler) processEnvoyTLSSecret(ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings) error { + var secret corev1.Secret - err = r.client.Get(ctx, + if err := r.client.Get(ctx, types.NamespacedName{Namespace: r.namespace, Name: envoyTLSSecretName}, &secret, - ) - if err != nil { - r.log.Error(err, - "failed to process Envoy TLS Secret", - "namespace", r.namespace, "name", envoyTLSSecretName) - return + ); err != nil { + return err } key := utils.NamespacedName(&secret).String() @@ -811,6 +929,7 @@ func (r *gatewayAPIReconciler) processEnvoyTLSSecret(ctx context.Context, resour resourceTree.Secrets = append(resourceTree.Secrets, &secret) r.log.Info("processing Envoy TLS Secret", "namespace", r.namespace, "name", envoyTLSSecretName) } + return nil } // processSecretRef adds the referenced Secret to the resourceTree if it's valid. @@ -827,12 +946,10 @@ func (r *gatewayAPIReconciler) processSecretRef( ) error { secret := new(corev1.Secret) secretNS := gatewayapi.NamespaceDerefOr(secretRef.Namespace, ownerNS) - err := r.client.Get(ctx, + if err := r.client.Get(ctx, types.NamespacedName{Namespace: secretNS, Name: string(secretRef.Name)}, - secret, - ) - if err != nil && kerrors.IsNotFound(err) { - return fmt.Errorf("unable to find the Secret: %s/%s", secretNS, string(secretRef.Name)) + secret); err != nil { + return err } if secretNS != ownerNS { @@ -878,7 +995,7 @@ func (r *gatewayAPIReconciler) processSecretRef( // to the resourceTree func (r *gatewayAPIReconciler) processCtpConfigMapRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { for _, policy := range resourceTree.ClientTrafficPolicies { tls := policy.Spec.TLS @@ -893,6 +1010,10 @@ func (r *gatewayAPIReconciler) processCtpConfigMapRefs( policy.Namespace, policy.Name, caCertRef); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } // we don't return an error here, because we want to continue // reconciling the rest of the ClientTrafficPolicies despite that this // reference is invalid. @@ -912,6 +1033,9 @@ func (r *gatewayAPIReconciler) processCtpConfigMapRefs( policy.Namespace, policy.Name, caCertRef); err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process CACertificateRef for ClientTrafficPolicy", "policy", policy, "caCertificateRef", caCertRef.Name) @@ -920,6 +1044,7 @@ func (r *gatewayAPIReconciler) processCtpConfigMapRefs( } } } + return nil } // processConfigMapRef adds the referenced ConfigMap to the resourceTree if it's valid. @@ -936,12 +1061,10 @@ func (r *gatewayAPIReconciler) processConfigMapRef( ) error { configMap := new(corev1.ConfigMap) configMapNS := gatewayapi.NamespaceDerefOr(configMapRef.Namespace, ownerNS) - err := r.client.Get(ctx, + if err := r.client.Get(ctx, types.NamespacedName{Namespace: configMapNS, Name: string(configMapRef.Name)}, - configMap, - ) - if err != nil && kerrors.IsNotFound(err) { - return fmt.Errorf("unable to find the ConfigMap: %s/%s", configMapNS, string(configMapRef.Name)) + configMap); err != nil { + return err } if configMapNS != ownerNS { @@ -986,7 +1109,7 @@ func (r *gatewayAPIReconciler) processConfigMapRef( // to the resourceTree func (r *gatewayAPIReconciler) processBtpConfigMapRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { for _, policy := range resourceTree.BackendTrafficPolicies { for _, ro := range policy.Spec.ResponseOverride { if ro.Response.Body != nil && ro.Response.Body.ValueRef != nil && string(ro.Response.Body.ValueRef.Kind) == resource.KindConfigMap { @@ -1002,6 +1125,10 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( // when translating to IR because the referenced configmap can't be // found. if err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process ResponseOverride ValueRef for BackendTrafficPolicy", "policy", policy, "ValueRef", ro.Response.Body.ValueRef.Name) @@ -1016,13 +1143,13 @@ func (r *gatewayAPIReconciler) processBtpConfigMapRefs( } } } + return nil } func (r *gatewayAPIReconciler) getNamespace(ctx context.Context, name string) (*corev1.Namespace, error) { nsKey := types.NamespacedName{Name: name} ns := new(corev1.Namespace) if err := r.client.Get(ctx, nsKey, ns); err != nil { - r.log.Error(err, "unable to get Namespace") return nil, err } return ns, nil @@ -1101,6 +1228,10 @@ func (r *gatewayAPIReconciler) processGateways(ctx context.Context, managedGC *g gtw := gtw //nolint:copyloopvar if r.namespaceLabel != nil { if ok, err := r.checkObjectNamespaceLabels(>w); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } r.log.Error(err, "failed to check namespace labels for gateway %s in namespace %s: %w", gtw.GetName(), gtw.GetNamespace()) continue } else if !ok { @@ -1124,6 +1255,9 @@ func (r *gatewayAPIReconciler) processGateways(ctx context.Context, managedGC *g gtw.Namespace, gtw.Name, certRef); err != nil { + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process TLS SecretRef for gateway", "gateway", gtw, "secretRef", certRef) @@ -1174,6 +1308,10 @@ func (r *gatewayAPIReconciler) processGateways(ctx context.Context, managedGC *g gtw.Status = gwapiv1.GatewayStatus{} if err := r.processGatewayParamsRef(ctx, >w, resourceMap, resourceTree); err != nil { + // If the error is transient, we return it to allow Reconcile to retry. + if isTransientError(err) { + return err + } // Update the Gateway status to not accepted if there is an error processing the parametersRef. // These not-accepted gateways will not be processed by the gateway-api layer, but their status will be // updated in the gateway-api layer along with other gateways. This is to avoid the potential race condition @@ -1231,9 +1369,7 @@ func (r *gatewayAPIReconciler) processClientTrafficPolicies( } } - r.processCtpConfigMapRefs(ctx, resourceTree, resourceMap) - - return nil + return r.processCtpConfigMapRefs(ctx, resourceTree, resourceMap) } // processBackendTrafficPolicies adds BackendTrafficPolicies to the resourceTree @@ -1254,8 +1390,7 @@ func (r *gatewayAPIReconciler) processBackendTrafficPolicies(ctx context.Context resourceTree.BackendTrafficPolicies = append(resourceTree.BackendTrafficPolicies, &backendTrafficPolicy) } } - r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) - return nil + return r.processBtpConfigMapRefs(ctx, resourceTree, resourceMap) } // processSecurityPolicies adds SecurityPolicies and their referenced resources to the resourceTree @@ -1279,9 +1414,7 @@ func (r *gatewayAPIReconciler) processSecurityPolicies( } // Add the referenced Resources in SecurityPolicies to the resourceTree - r.processSecurityPolicyObjectRefs(ctx, resourceTree, resourceMap) - - return nil + return r.processSecurityPolicyObjectRefs(ctx, resourceTree, resourceMap) } // processBackendTLSPolicies adds BackendTLSPolicies and their referenced resources to the resourceTree @@ -1305,8 +1438,7 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicies( } // Add the referenced Secrets and ConfigMaps in BackendTLSPolicies to the resourceTree. - r.processBackendTLSPolicyRefs(ctx, resourceTree, resourceMap) - return nil + return r.processBackendTLSPolicyRefs(ctx, resourceTree, resourceMap) } // processBackends adds Backends to the resourceTree @@ -2023,9 +2155,7 @@ func (r *gatewayAPIReconciler) processGatewayParamsRef(ctx context.Context, gtw gtw.Namespace, gtw.Name, *certRef); err != nil { - r.log.Error(err, - "failed to process TLS SecretRef for gateway", - "gateway", utils.NamespacedName(gtw).String(), "secretRef", certRef) + return fmt.Errorf("failed to process TLS SecretRef for gateway %s/%s: %w", gtw.Namespace, gtw.Name, err) } } } @@ -2042,9 +2172,6 @@ func (r *gatewayAPIReconciler) processGatewayClassParamsRef(ctx context.Context, ep := new(egv1a1.EnvoyProxy) if err := r.client.Get(ctx, types.NamespacedName{Namespace: string(*gc.Spec.ParametersRef.Namespace), Name: gc.Spec.ParametersRef.Name}, ep); err != nil { - if kerrors.IsNotFound(err) { - return fmt.Errorf("envoyproxy referenced by gatewayclass is not found: %w", err) - } return fmt.Errorf("failed to find envoyproxy %s/%s: %w", r.namespace, gc.Spec.ParametersRef.Name, err) } @@ -2148,7 +2275,7 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicyRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { for _, policy := range resourceTree.BackendTLSPolicies { tls := policy.Spec.Validation @@ -2187,6 +2314,10 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicyRefs( caRefNew) } if err != nil { + // if the error is transient, we return it to retry later + if isTransientError(err) { + return err + } // we don't return an error here, because we want to continue // reconciling the rest of the ClientTrafficPolicies despite that this // reference is invalid. @@ -2201,6 +2332,7 @@ func (r *gatewayAPIReconciler) processBackendTLSPolicyRefs( } } } + return nil } // processEnvoyExtensionPolicies adds EnvoyExtensionPolicies and their referenced resources to the resourceTree @@ -2225,9 +2357,7 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicies( } // Add the referenced Resources in EnvoyExtensionPolicies to the resourceTree - r.processEnvoyExtensionPolicyObjectRefs(ctx, resourceTree, resourceMap) - - return nil + return r.processEnvoyExtensionPolicyObjectRefs(ctx, resourceTree, resourceMap) } // processExtensionServerPolicies adds directly attached policies intended for the extension server @@ -2270,7 +2400,7 @@ func (r *gatewayAPIReconciler) processExtensionServerPolicies( // - ValueRefs for Luas func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( ctx context.Context, resourceTree *resource.Resources, resourceMap *resourceMappings, -) { +) error { // we don't return errors from this method, because we want to continue reconciling // the rest of the EnvoyExtensionPolicies despite that one reference is invalid. This // allows Envoy Gateway to continue serving traffic even if some EnvoyExtensionPolicies @@ -2308,6 +2438,10 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( policy.Namespace, policy.Name, *wasm.Code.Image.PullSecretRef); err != nil { + // If the error is transient, we return it to retry later + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process Wasm Image PullSecretRef for EnvoyExtensionPolicy", "policy", policy, "secretRef", wasm.Code.Image.PullSecretRef) @@ -2325,6 +2459,10 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( configMap, ) if err != nil { + // If the error is transient, we return it to retry later + if isTransientError(err) { + return err + } r.log.Error(err, "failed to process Lua ValueRef for EnvoyExtensionPolicy", "policy", policy, "ValueRef", lua.ValueRef.Name) @@ -2340,4 +2478,5 @@ func (r *gatewayAPIReconciler) processEnvoyExtensionPolicyObjectRefs( } } } + return nil } diff --git a/internal/provider/kubernetes/controller_test.go b/internal/provider/kubernetes/controller_test.go index f01716ed63..a8b33af98a 100644 --- a/internal/provider/kubernetes/controller_test.go +++ b/internal/provider/kubernetes/controller_test.go @@ -7,11 +7,14 @@ package kubernetes import ( "context" + "fmt" "os" "testing" "github.com/stretchr/testify/require" + kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -1022,3 +1025,32 @@ func setupReferenceGrantReconciler(objs []client.Object) *gatewayAPIReconciler { Build() return r } + +func TestIsTransientError(t *testing.T) { + serverTimeoutErr := kerrors.NewServerTimeout( + schema.GroupResource{Group: "core", Resource: "pods"}, "list", 10) + timeoutErr := kerrors.NewTimeoutError("request timeout", 1) + wrappedTooManyRequestsErr := fmt.Errorf("wrapping: %w", kerrors.NewTooManyRequests("too many requests", 1)) + serviceUnavailableErr := kerrors.NewServiceUnavailable("service unavailable") + badRequestErr := kerrors.NewBadRequest("bad request") + + testCases := []struct { + name string + err error + expected bool + }{ + {"ServerTimeout", serverTimeoutErr, true}, + {"Timeout", timeoutErr, true}, + {"TooManyRequests", wrappedTooManyRequestsErr, true}, + {"ServiceUnavailable", serviceUnavailableErr, true}, + {"BadRequest", badRequestErr, false}, + {"NilError", nil, false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actual := isTransientError(tc.err) + require.Equal(t, tc.expected, actual) + }) + } +}