diff --git a/internal/provider/kubernetes/kubernetes.go b/internal/provider/kubernetes/kubernetes.go index b99a486d3f..57084b84a4 100644 --- a/internal/provider/kubernetes/kubernetes.go +++ b/internal/provider/kubernetes/kubernetes.go @@ -10,6 +10,8 @@ import ( "fmt" "time" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/rest" "k8s.io/klog/v2" "k8s.io/utils/ptr" @@ -26,6 +28,7 @@ import ( egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" "github.com/envoyproxy/gateway/internal/envoygateway" ec "github.com/envoyproxy/gateway/internal/envoygateway/config" + "github.com/envoyproxy/gateway/internal/infrastructure/kubernetes/proxy" "github.com/envoyproxy/gateway/internal/message" ) @@ -112,6 +115,16 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources mgrOpts.Cache.SyncPeriod = ptr.To(csp) } + // Limit the cache to only Envoy proxy Pods to reduce memory and sync churn. + // ProxyTopologyInjector is the only component that interacts with Pods. + if mgrOpts.Cache.ByObject == nil { + mgrOpts.Cache.ByObject = map[client.Object]cache.ByObject{} + } + + mgrOpts.Cache.ByObject[&corev1.Pod{}] = cache.ByObject{ + Label: labels.SelectorFromSet(proxy.EnvoyAppLabel()), + } + if svrCfg.EnvoyGateway.NamespaceMode() { mgrOpts.Cache.DefaultNamespaces = make(map[string]cache.Config) for _, watchNS := range svrCfg.EnvoyGateway.Provider.Kubernetes.Watch.Namespaces { @@ -134,9 +147,10 @@ func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources if svrCfg.EnvoyGateway.Provider.Kubernetes.TopologyInjector == nil || !ptr.Deref(svrCfg.EnvoyGateway.Provider.Kubernetes.TopologyInjector.Disable, false) { mgr.GetWebhookServer().Register("/inject-pod-topology", &webhook.Admission{ Handler: &ProxyTopologyInjector{ - Client: mgr.GetClient(), - Logger: svrCfg.Logger.WithName("proxy-topology-injector"), - Decoder: admission.NewDecoder(mgr.GetScheme()), + Client: mgr.GetClient(), + APIReader: mgr.GetAPIReader(), + Logger: svrCfg.Logger.WithName("proxy-topology-injector"), + Decoder: admission.NewDecoder(mgr.GetScheme()), }, }) } diff --git a/internal/provider/kubernetes/topology_injector.go b/internal/provider/kubernetes/topology_injector.go index 9a4edea229..c928dfe2f7 100644 --- a/internal/provider/kubernetes/topology_injector.go +++ b/internal/provider/kubernetes/topology_injector.go @@ -22,9 +22,9 @@ import ( type ProxyTopologyInjector struct { client.Client - Decoder admission.Decoder - - Logger logging.Logger + APIReader client.Reader + Decoder admission.Decoder + Logger logging.Logger } func (m *ProxyTopologyInjector) Handle(ctx context.Context, req admission.Request) admission.Response { @@ -50,9 +50,13 @@ func (m *ProxyTopologyInjector) Handle(ctx context.Context, req admission.Reques pod := &corev1.Pod{} if err := m.Get(ctx, podName, pod); err != nil { - logger.Error(err, "get pod failed", "pod", podName.String()) - topologyInjectorEventsTotal.WithFailure(metrics.ReasonError).Increment() - return admission.Allowed("internal error, skipped") + // Cache isn't guaranteed to be updated yet so if m.Get() fails + // try getting the pod from API server directly. + if err = m.APIReader.Get(ctx, podName, pod); err != nil { + logger.Error(err, "get pod failed", "pod", podName.String()) + topologyInjectorEventsTotal.WithFailure(metrics.ReasonError).Increment() + return admission.Allowed("internal error, skipped") + } } // Skip non-proxy pods diff --git a/release-notes/current.yaml b/release-notes/current.yaml index e8207d54f6..8678032eee 100644 --- a/release-notes/current.yaml +++ b/release-notes/current.yaml @@ -21,6 +21,7 @@ bug fixes: | Fixed handling of millisecond-level retry durations and token TTLs in OIDC authn. Fixed indexer and controller crashing when BackendTrafficPolicy has a redirect response override. Fixed Lua validator log level to be suppressed by default. + Fixed ProxyTopologyInjector cache sync race condition that caused injection failures # Enhancements that improve performance. performance improvements: |